1
1

Bring the map fixes into the main trunk. This should fix several problems, including the multiple app_context issue.

I have tested on rsh, slurm, bproc, and tm. Bproc continues to have a problem (will be asking for help there).

Gridengine compiles but I cannot test (believe it likely will run).

Poe and xgrid compile to the extent they can without the proper include files.

This commit was SVN r12059.
Этот коммит содержится в:
Ralph Castain 2006-10-07 15:45:24 +00:00
родитель 5dbe5c7442
Коммит ae79894bad
34 изменённых файлов: 2176 добавлений и 2727 удалений

Просмотреть файл

@ -259,7 +259,7 @@ int orte_errmgr_bproc_register_job(orte_jobid_t job)
}
/* send the request */
if (0 > orte_rml.send_buffer(orte_errmgr_proxy_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) {
if (0 > orte_rml.send_buffer(orte_errmgr_bproc_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(cmd);
return ORTE_ERR_COMM_FAILURE;
@ -274,7 +274,7 @@ int orte_errmgr_bproc_register_job(orte_jobid_t job)
}
/* enter a blocking receive until we hear back */
if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_ERRMGR)) {
if (0 > orte_rml.recv_buffer(orte_errmgr_bproc_globals.replica, answer, ORTE_RML_TAG_ERRMGR)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -65,9 +65,14 @@ int orte_ns_base_print_name(char **output, char *prefix, orte_process_name_t *na
/* set default result */
*output = NULL;
if (NULL == name) {
asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: NULL",
(NULL == prefix ? " " : prefix));
} else {
asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: [%lu,%lu,%lu]",
(NULL == prefix ? " " : prefix), (unsigned long)name->cellid,
(unsigned long)name->jobid, (unsigned long)name->vpid);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -137,15 +137,3 @@ int orte_odls_bproc_component_close(void)
OBJ_DESTRUCT(&mca_odls_bproc_component.children);
return ORTE_SUCCESS;
}
int orte_odls_bproc_component_finalize(void)
{
opal_list_item_t *item;
/* cleanup state */
while (NULL != (item = opal_list_remove_first(&mca_odls_bproc_component.children))) {
OBJ_RELEASE(item);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -53,7 +53,7 @@ OBJ_CLASS_INSTANCE(orte_pls_daemon_info_t, /* type name */
/*
* Store the active daemons for a job
*/
int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
int orte_pls_base_store_active_daemons(opal_list_t *daemons)
{
orte_pls_daemon_info_t *dmn;
opal_list_item_t *item;
@ -64,6 +64,10 @@ int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
/* determine the number of daemons */
num_daemons = opal_list_get_size(daemons);
if (0 == num_daemons) {
return ORTE_SUCCESS;
}
/* since each daemon gets recorded in a separate node's container,
* we need to allocate space for num_daemons value objects
*/
@ -74,15 +78,6 @@ int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
}
memset(values, 0, num_daemons*sizeof(orte_gpr_value_t*)); /* NULL the array */
/* setup the key */
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, job))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
return rc;
}
asprintf(&key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);
free(jobid_string);
/* loop through the values and the list and create all the value objects */
item = opal_list_get_first(daemons);
for (i=0; i < num_daemons; i++) {
@ -102,6 +97,15 @@ int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
goto CLEANUP;
}
/* setup the key */
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, dmn->active_job))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
return rc;
}
asprintf(&key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);
free(jobid_string);
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[0]), key, ORTE_NAME, dmn->name))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
@ -140,7 +144,10 @@ int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job)
NULL
};
orte_cellid_t *cell;
char *nodename;
orte_process_name_t *name;
orte_pls_daemon_info_t *dmn;
bool found_name, found_node, found_cell;
int rc;
/* setup the key */
@ -164,27 +171,29 @@ int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job)
/* loop through the answers and construct the list */
for (i=0; i < cnt; i++) {
/* each container should have only one set of values */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
if (NULL == dmn) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto CLEANUP;
}
/* for systems such as bproc, the node segment holds containers
* for nodes that we may not have launched upon. Each container
* will send us back a value object, so we have to ensure here
* that we only create daemon objects on the list for those nodes
* that DO provide a valid object
*/
found_name = found_node = found_cell = false;
for (j=0; j < values[i]->cnt; j++) {
kv = values[i]->keyvals[j];
if (0 == strcmp(kv->key, keys[0])) {
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), kv->value->data, ORTE_NAME))) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&name, kv->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
found_name = true;
continue;
}
if (0 == strcmp(kv->key, ORTE_NODE_NAME_KEY)) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->nodename), kv->value->data, ORTE_STRING))) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&nodename, kv->value, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
found_node = true;
continue;
}
if (0 == strcmp(kv->key, ORTE_CELLID_KEY)) {
@ -192,12 +201,32 @@ int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job)
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
dmn->cell = *cell;
found_cell = true;
continue;
}
}
/* if we found everything, then this is a valid entry - create
* it and add it to the list
*/
if (found_name && found_node && found_cell) {
dmn = OBJ_NEW(orte_pls_daemon_info_t);
if (NULL == dmn) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(dmn);
goto CLEANUP;
}
dmn->cell = *cell;
if (NULL != nodename) {
dmn->nodename = strdup(nodename);
}
/* add this daemon to the list */
opal_list_append(daemons, &dmn->super);
}
OBJ_RELEASE(values[i]);
}
@ -212,5 +241,23 @@ CLEANUP:
}
/*
* Retrieve the active daemon(s) for a specific node
* Remove a daemon from the world of active daemons
*/
int orte_pls_base_remove_daemon(orte_pls_daemon_info_t *info)
{
opal_list_t daemons;
int rc;
OBJ_CONSTRUCT(&daemons, opal_list_t);
/* We actually don't want to do this - instead, we need to do a registry
* delete function call targeting this entry
*/
if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, info->active_job))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* find this item in the list */
return ORTE_SUCCESS;
}

Просмотреть файл

@ -58,6 +58,7 @@ int orte_pls_base_orted_exit(opal_list_t *daemons)
item = opal_list_get_next(item)) {
dmn = (orte_pls_daemon_info_t*)item;
opal_output(0, "sending exit cmd to daemon [%ld,%ld,%ld]", ORTE_NAME_ARGS(dmn->name));
if (0 > orte_rml.send_buffer(dmn->name, &cmd, ORTE_RML_TAG_PLS_ORTED, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_DESTRUCT(&cmd);

Просмотреть файл

@ -80,7 +80,8 @@ extern "C" {
int orte_pls_base_orted_add_local_procs(opal_list_t *daemons, orte_gpr_notify_data_t *ndat);
int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job);
int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job);
int orte_pls_base_store_active_daemons(opal_list_t *daemons);
int orte_pls_base_remove_daemon(orte_pls_daemon_info_t *info);
/*
* communications utilities

Просмотреть файл

@ -60,17 +60,14 @@
#include "orte/mca/ns/ns.h"
#include "orte/mca/sds/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/ras/base/base.h"
#include "orte/mca/ras/ras.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/rmaps/rmaps_types.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/smr/smr.h"
#include "orte/runtime/orte_wait.h"
#include "orte/runtime/runtime.h"
/* remove this when moved to 2.0 */
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/pls/base/pls_private.h"
#include "pls_bproc.h"
@ -104,7 +101,7 @@ orte_pls_base_module_t orte_pls_bproc_module = {
};
static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
static int orte_pls_bproc_node_array(orte_job_map_t* map,
int ** node_array, int * node_array_len);
static int orte_pls_bproc_node_list(int * node_array, int node_array_len,
int ** node_list, int * num_nodes,
@ -123,12 +120,12 @@ static int bproc_vexecmove(int nnodes, int *nodes, int *pids, const char *cmd,
#endif
static void orte_pls_bproc_setup_env(char *** env);
static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
int ** node_arrays, int * node_array_lens,
int num_contexts, int num_procs,
orte_job_map_t *map,
orte_vpid_t global_vpid_start,
orte_jobid_t jobid, int* num_daemons);
static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
orte_rmaps_base_map_t* map, int num_processes,
orte_job_map_t* map, int num_processes,
int num_slots,
orte_vpid_t vpid_start,
orte_vpid_t global_vpid_start,
int app_context,
@ -144,7 +141,7 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
* @retval >=0 the number of processes
* @retval <0 orte err
*/
static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
static int orte_pls_bproc_node_array(orte_job_map_t* map,
int ** node_array, int * node_array_len) {
opal_list_item_t* item;
int num_procs = 0;
@ -156,8 +153,8 @@ static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
if(*node_array_len < atol(((orte_rmaps_base_node_t*)item)->node->node_name)) {
*node_array_len = atol(((orte_rmaps_base_node_t*)item)->node->node_name);
if(*node_array_len < atol(((orte_mapped_node_t*)item)->nodename)) {
*node_array_len = atol(((orte_mapped_node_t*)item)->nodename);
}
}
(*node_array_len)++;
@ -172,9 +169,9 @@ static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_rmaps_base_node_t* node = (orte_rmaps_base_node_t*)item;
num_on_node = opal_list_get_size(&node->node_procs);
(*node_array)[atol(node->node->node_name)] += num_on_node;
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
num_on_node = opal_list_get_size(&node->procs);
(*node_array)[atol(node->nodename)] += num_on_node;
num_procs += num_on_node;
}
return num_procs;
@ -493,14 +490,12 @@ static void orte_pls_bproc_setup_env(char *** env)
* @retval error
*/
static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
int ** node_arrays, int * node_array_lens,
int num_contexts, int num_procs,
orte_job_map_t *map,
orte_vpid_t global_vpid_start,
orte_jobid_t jobid, int *num_launched) {
int * daemon_list = NULL;
int num_nodes = 0;
int num_daemons = 0;
int rc, i, j;
int rc, i;
int * pids = NULL;
int argc;
char ** argv = NULL;
@ -524,26 +519,25 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
*/
OBJ_CONSTRUCT(&daemons, opal_list_t);
/* find the length of the longest node array */
for(i = 0; i < num_contexts; i++) {
if(node_array_lens[i] > num_nodes) {
num_nodes = node_array_lens[i];
}
}
if(NULL == (daemon_list = (int*)malloc(sizeof(int) * num_nodes))) {
/* get the number of nodes in this job and allocate an array for
* their names so we can pass that to bproc - populate the list
* with the node names
*/
num_daemons = opal_list_get_size(&map->nodes);
if(NULL == (daemon_list = (int*)malloc(sizeof(int) * num_daemons))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup;
}
/* create a list of all the nodes that need daemons, which is all the nodes
* that will have at least 1 process */
for(i = 0; i < num_nodes; i++) {
for(j = 0; j < num_contexts; j++) {
if(i < node_array_lens[j] && 0 < *(node_arrays[j] + i)) {
daemon_list[num_daemons++] = i;
break;
}
}
i = 0;
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_mapped_node_t *node = (orte_mapped_node_t*)item;
daemon_list[i++] = atoi(node->nodename);
}
/* allocate storage to save the daemon pids */
if(NULL == (pids = (int*)malloc(sizeof(int) * num_daemons))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup;
@ -574,7 +568,7 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
/* set up the base environment so the daemons can get their names once launched */
rc = orte_ns_nds_bproc_put(cellid, daemon_jobid, daemon_vpid_start,
global_vpid_start, num_procs, envp);
global_vpid_start, num_daemons, envp);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -695,7 +689,7 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
}
}
/* store the daemon info */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc);
}
*num_launched = num_daemons;
@ -857,19 +851,20 @@ orte_pls_bproc_monitor_nodes(void)
* @retval error
*/
static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
orte_rmaps_base_map_t* map, int num_processes,
orte_job_map_t* map, int num_processes, int num_slots,
orte_vpid_t vpid_start,
orte_vpid_t global_vpid_start,
int app_context, int * node_array,
int node_array_len) {
int * node_list = NULL;
int num_nodes, num_slots, cycle;
int num_nodes, cycle;
int rc, i, j, stride;
int * pids = NULL;
char * var, * param;
orte_process_name_t * proc_name;
struct bproc_io_t bproc_io[3];
orte_rmaps_base_node_t *node;
char **env;
int dbg;
OPAL_TRACE(1);
@ -878,25 +873,16 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* point to the env array for this app_context */
env = map->apps[app_context]->env;
/* set up app context */
asprintf(&param, "%d", app_context);
var = mca_base_param_environ_variable("pls", "bproc", "app_context");
opal_setenv(var, param, true, &map->app->env);
opal_setenv(var, param, true, &env);
free(param);
free(var);
/* in order for bproc processes to properly compute their name,
* we have to provide them with info on the number of slots
* on each node (which is a constant in bproc). We will pass this
* in an appropriate parameter which we set for each app_context
*/
node = (orte_rmaps_base_node_t*)opal_list_get_first(&map->nodes);
if (NULL == node) {
ORTE_ERROR_LOG(ORTE_ERROR);
return ORTE_ERROR;
}
num_slots = node->node->node_slots;
/* set the vpid-to-vpid stride based on the mapping mode */
if (mca_pls_bproc_component.bynode) {
/* we are mapping by node, so we want to set the stride
@ -914,7 +900,7 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
/* and push that value into the process' environment */
asprintf(&param, "%ld", (long)stride);
var = mca_base_param_environ_variable("pls", "bproc", "stride");
opal_setenv(var, param, true, &map->app->env);
opal_setenv(var, param, true, &env);
free(param);
free(var);
@ -943,11 +929,14 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
ORTE_ERROR_LOG(rc);
goto cleanup;
}
fprintf(stderr, "launching app %s\n", map->apps[app_context]->app);
while(0 != num_nodes) {
fprintf(stderr, "\tlaunching cycle %d\n", i);
for (dbg=0; dbg<num_nodes; dbg++) fprintf(stderr, "\t\tlaunching on node %d\n", node_list[dbg]);
/* setup environment so the procs can figure out their names */
rc = orte_ns_nds_bproc_put(cellid, jobid, vpid_start, global_vpid_start,
num_processes, &map->app->env);
num_processes, &env);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -962,21 +951,22 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
opal_output(0, "pls_bproc: launching %d processes", num_nodes);
}
rc = bproc_vexecmove_io(num_nodes, node_list, pids, bproc_io, 3,
map->app->app, map->app->argv, map->app->env);
map->apps[app_context]->app,
map->apps[app_context]->argv, env);
if(0 < mca_pls_bproc_component.debug) {
opal_output(0, "pls_bproc: %d processes launched. First pid: %d",
rc, *pids);
}
if(rc != num_nodes) {
opal_show_help("help-pls-bproc.txt", "proc-launch-number", true,
num_nodes, rc, map->app->app);
num_nodes, rc, map->apps[app_context]->app);
rc = ORTE_ERROR;
goto cleanup;
}
for(j = 0; j < num_nodes; j++) {
if(0 >= pids[j]) {
opal_show_help("help-pls-bproc.txt", "proc-launch-bad-pid", true,
node_list[j], pids[j], errno, map->app->app);
node_list[j], pids[j], errno, map->apps[app_context]->app);
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -1056,10 +1046,10 @@ cleanup:
* @retval error
*/
int orte_pls_bproc_launch(orte_jobid_t jobid) {
opal_list_item_t* item, *item2;
opal_list_t mapping;
opal_list_item_t* item;
orte_cellid_t cellid;
orte_rmaps_base_map_t* map;
orte_job_map_t* map;
orte_mapped_node_t *map_node;
orte_vpid_t vpid_launch;
orte_vpid_t vpid_range;
orte_vpid_t vpid_start;
@ -1068,11 +1058,13 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
int ** node_array = NULL;
int * node_array_len = NULL;
int num_processes = 0;
int num_daemons = 0;
int context = 0;
int j;
int num_daemons;
int num_slots;
int context;
int i, j;
orte_std_cntr_t idx;
char cwd_save[OMPI_PATH_MAX + 1];
orte_ras_node_t *ras_node;
OPAL_TRACE(1);
@ -1089,12 +1081,12 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
}
cwd_save[sizeof(cwd_save) - 1] = '\0';
/* query for the application context and allocated nodes */
OBJ_CONSTRUCT(&mapping, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_map(jobid, &mapping))) {
/* get the job map */
if(ORTE_SUCCESS != (rc = orte_rmaps.get_job_map(&map, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(ORTE_SUCCESS != (rc = orte_rmgr.get_vpid_range(jobid, &vpid_start,
&vpid_range))) {
ORTE_ERROR_LOG(rc);
@ -1104,30 +1096,18 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
/* get the cellid */
cellid = orte_process_info.my_name->cellid;
/* do a large lock so the processes will not decrement the process count
* until we are done launching */
for (item = opal_list_get_first(&mapping);
item != opal_list_get_end(&mapping);
item = opal_list_get_next(item)) {
orte_std_cntr_t i;
map = (orte_rmaps_base_map_t*) item;
orte_dss.dump(0, map, ORTE_JOB_MAP);
for (i = 0; i < map->num_procs; ++i) {
orte_app_context_t *context = map->app;
/* check all of the app_contexts for sanity */
for (i=0; i < map->num_apps; i++) {
/* Check that the cwd is sane. We have to chdir there in
to check the executable, because the executable could
have been specified as a relative path to the wdir */
rc = orte_rmgr.check_context_cwd(context, true);
rc = orte_rmgr.check_context_cwd(map->apps[i], true);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
/* Check that the app exists and is executable */
rc = orte_rmgr.check_context_app(context);
rc = orte_rmgr.check_context_app(map->apps[i]);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
@ -1138,7 +1118,23 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
goto cleanup;
}
}
/* For Bproc, we need to know how many slots were allocated on each
* node so the spawned processes can computer their name. Only Bproc
* needs to do this, so we choose not to modify the mapped_node struct
* to hold this info - bproc can go get it.
*
* Since Bproc also requires that the slots allocated on each node
* be the same, we really only need to lookup a single node. So grab
* the data for the first node on the map
*/
map_node = (orte_mapped_node_t*)opal_list_get_first(&map->nodes);
if (NULL == (ras_node = orte_ras.node_lookup(map_node->cell, map_node->nodename))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
goto cleanup;
}
num_slots = ras_node->node_slots;
OBJ_RELEASE(ras_node);
if(0 < mca_pls_bproc_component.debug) {
opal_output(0, "pls_bproc: --- starting to launch procs ---");
@ -1146,44 +1142,34 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
/* create an array to hold the pointers to the node arrays for each app
* context. Also, create an array to hold the lengths of the node arrays */
node_array = malloc(opal_list_get_size(&mapping) * sizeof(int *));
node_array_len = malloc(opal_list_get_size(&mapping) * sizeof(int *));
node_array = malloc(map->num_apps * sizeof(int *));
node_array_len = malloc(map->num_apps * sizeof(int *));
/* for each application context - create a node array and setup its env */
for(item = opal_list_get_first(&mapping);
item != opal_list_get_end(&mapping);
item = opal_list_get_next(item)) {
map = (orte_rmaps_base_map_t*)item;
rc = orte_pls_bproc_node_array(map, &node_array[context],
&node_array_len[context]);
for(i=0; i < map->num_apps; i++) {
rc = orte_pls_bproc_node_array(map, &node_array[i],
&node_array_len[i]);
if(0 > rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
orte_pls_bproc_setup_env(&map->app->env);
orte_pls_bproc_setup_env(&map->apps[i]->env);
num_processes += rc;
context++;
}
/* save the active node names */
idx = 0;
for (item = opal_list_get_first(&mapping);
item != opal_list_get_end(&mapping);
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t *map = (orte_rmaps_base_map_t*) item;
for (item2 = opal_list_get_first(&map->nodes);
item2 != opal_list_get_end(&map->nodes);
item2 = opal_list_get_next(item2)) {
orte_ras_node_t* node = (orte_ras_node_t*) item2;
orte_mapped_node_t* node = (orte_mapped_node_t*) item;
rc = orte_pointer_array_add(&idx, mca_pls_bproc_component.active_node_names,
strdup(node->node_name));
}
strdup(node->nodename));
}
/* setup subscription for each node so we can detect
when the node's state changes, usefull for aborting when
when the node's state changes, useful for aborting when
a bproc node up and dies */
rc = orte_pls_bproc_monitor_nodes();
@ -1193,9 +1179,11 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
goto cleanup;
}
/* launch the daemons on all the nodes which have processes assign to them */
rc = orte_pls_bproc_launch_daemons(cellid, &map->app->env, node_array,
node_array_len, context, num_processes,
/* launch the daemons on all the nodes which have processes assigned to them.
* We need to send along an appropriate environment for the daemons. Since
* there must be at least ONE app_context, we can just take that one
*/
rc = orte_pls_bproc_launch_daemons(cellid, &map->apps[0]->env, map,
vpid_start, jobid, &num_daemons);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
@ -1238,44 +1226,35 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
}
}
context = 0;
vpid_launch = vpid_start;
opal_output(0, "launching apps");
/* for each application context launch the app */
for(item = opal_list_get_first(&mapping);
item != opal_list_get_end(&mapping);
item = opal_list_get_next(item)) {
map = (orte_rmaps_base_map_t*)item;
rc = orte_rmgr.check_context_cwd(map->app, true);
for(context=0; context < map->num_apps; context++) {
rc = orte_rmgr.check_context_cwd(map->apps[context], true);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
rc = orte_pls_bproc_launch_app(cellid, jobid, map, num_processes,
vpid_launch, vpid_start, map->app->idx,
rc = orte_pls_bproc_launch_app(cellid, jobid, map, num_processes, num_slots,
vpid_launch, vpid_start, context,
node_array[context], node_array_len[context]);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
free(node_array[context]);
context++;
vpid_launch = vpid_start + mca_pls_bproc_component.num_procs;
}
mca_pls_bproc_component.done_launching = true;
cleanup:
chdir(cwd_save);
while(NULL != (item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(item);
}
if(NULL != node_array) {
free(node_array);
}
if(NULL != node_array_len) {
free(node_array_len);
}
OBJ_DESTRUCT(&mapping);
return rc;
}

Просмотреть файл

@ -81,11 +81,9 @@
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/smr/smr.h"
/* clean up for ORTE 2.0 */
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/pls/pls.h"
#include "orte/mca/pls/base/pls_private.h"
#include "orte/mca/pls/gridengine/pls_gridengine.h"
@ -104,21 +102,10 @@ orte_pls_base_module_t orte_pls_gridengine_module = {
orte_pls_gridengine_finalize
};
/**
* struct used to have enough information to clean up the state of the
* universe if a daemon aborts
*/
struct gridengine_daemon_info_t {
opal_object_t super;
orte_process_name_t *name;
char *nodename;
};
typedef struct gridengine_daemon_info_t gridengine_daemon_info_t;
static OBJ_CLASS_INSTANCE(gridengine_daemon_info_t,
opal_object_t,
NULL, NULL);
static void set_handler_default(int sig);
#if 0
static int update_slot_keyval(orte_ras_node_t* node, int* slot_cnt);
#endif
/**
* Fill the orted_path variable with the directory to the orted
@ -146,7 +133,7 @@ static int orte_pls_gridengine_fill_orted_path(char** orted_path)
*/
static void orte_pls_gridengine_wait_daemon(pid_t pid, int status, void* cbdata)
{
gridengine_daemon_info_t *info = (gridengine_daemon_info_t*) cbdata;
orte_pls_daemon_info_t *info = (orte_pls_daemon_info_t*) cbdata;
int rc;
/* if qrsh exited abnormally, set the daemon's state to aborted
@ -204,16 +191,16 @@ static void orte_pls_gridengine_wait_daemon(pid_t pid, int status, void* cbdata)
*/
int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
{
opal_list_t mapping;
opal_list_item_t* m_item, *n_item;
orte_job_map_t *map;
opal_list_item_t *n_item;
orte_std_cntr_t num_nodes;
orte_vpid_t vpid;
int node_name_index1;
int node_name_index2;
int proc_name_index;
int orted_index;
int call_yield_index;
char *jobid_string;
char *prefix_dir;
char *uri, *param;
char **argv;
int argc;
@ -229,26 +216,19 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
*/
OBJ_CONSTRUCT(&daemons, opal_list_t);
/* Query the list of nodes allocated and mapped to this job.
/* Get the map for this job.
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
* All other mapping responsibilities fall to orted in the fork PLS
*/
OBJ_CONSTRUCT(&mapping, opal_list_t);
rc = orte_rmaps_base_get_map(jobid, &mapping);
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
num_nodes = 0;
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
num_nodes += opal_list_get_size(&map->nodes);
}
num_nodes = (orte_std_cntr_t)opal_list_get_size(&map->nodes);
/*
* Allocate a range of vpids for the daemons.
@ -353,10 +333,6 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
free(uri);
free(param);
opal_argv_append(&argc, &argv, "--mpi-call-yield");
call_yield_index = argc;
opal_argv_append(&argc, &argv, "0");
if (mca_pls_gridengine_component.debug) {
param = opal_argv_join(argv, ' ');
if (NULL != param) {
@ -368,44 +344,41 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
/* Figure out the basenames for the libdir and bindir. There is a
lengthy comment about this in pls_rsh_module.c explaining all
the rationale for how / why we're doing this. */
the rationale for how / why we're doing this.
*/
lib_base = opal_basename(OPAL_LIBDIR);
bin_base = opal_basename(OPAL_BINDIR);
/*
* Iterate through each of the contexts
/* See the note about prefix_dir in the orte/mca/pls/slurm/pls_slurm.c
* module. Fo here, just note that we must have at least one app_context,
* and we take the prefix_dir from that first one.
*/
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
char *prefix_dir = map->app->prefix_dir;
prefix_dir = map->apps[0]->prefix_dir;
/*
* For each of the contexts - iterate through the nodes.
* Iterate through the nodes.
*/
for(n_item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
orte_ras_node_t* ras_node = rmaps_node->node;
orte_mapped_node_t* rmaps_node = (orte_mapped_node_t*)n_item;
orte_process_name_t* name;
pid_t pid;
char *exec_path, *orted_path;
char **exec_argv;
#if 0
int remain_slot_cnt;
/* already launched on this node */
if(ras_node->node_launched++ != 0) {
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: already launched on this node, %s",
ras_node->node_name);
}
continue;
}
/* query the registry for the remaining gridengine slot count on
/* RHC - I don't believe this code is really necessary any longer.
* The mapper correctly accounts for slots that have already been
* used. Even if another job starts to run between the time the
* mapper maps this job and we get to this point, the new job
* will have gone through the mapper and will not overuse the node.
* As this code consumes considerable time, I have sliced it out
* of the code for now.
*
* query the registry for the remaining gridengine slot count on
* this node, and update the registry for the count for the
* current process launch */
if (ORTE_SUCCESS != (rc =
@ -421,22 +394,23 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
true, ras_node->node_name, true);
exit(-1); /* exit instead of return ORTE_ERR_OUT_OF_RESOURCE */
}
#endif
/* setup node name */
free(argv[node_name_index1]);
if (NULL != ras_node->node_username &&
0 != strlen (ras_node->node_username)) {
if (NULL != rmaps_node->username &&
0 != strlen (rmaps_node->username)) {
asprintf(&argv[node_name_index1], "%s@%s",
ras_node->node_username, ras_node->node_name);
rmaps_node->username, rmaps_node->nodename);
} else {
argv[node_name_index1] = strdup(ras_node->node_name);
argv[node_name_index1] = strdup(rmaps_node->nodename);
}
free(argv[node_name_index2]);
argv[node_name_index2] = strdup(ras_node->node_name);
argv[node_name_index2] = strdup(rmaps_node->nodename);
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, ras_node->node_cellid, 0, vpid);
rc = orte_ns.create_process_name(&name, rmaps_node->cell, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -445,8 +419,8 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
/* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid;
dmn->cell = ras_node->node_cellid;
dmn->nodename = strdup(ras_node->node_name);
dmn->cell = rmaps_node->cell;
dmn->nodename = strdup(rmaps_node->nodename);
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -474,27 +448,7 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: launching on node %s",
ras_node->node_name);
}
/* set the progress engine schedule for this node.
* if node_slots is set to zero, then we default to
* NOT being oversubscribed
*/
if (ras_node->node_slots > 0 &&
(orte_std_cntr_t)opal_list_get_size(&rmaps_node->node_procs) > ras_node->node_slots) {
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
ras_node->node_slots, opal_list_get_size(&rmaps_node->node_procs));
}
free(argv[call_yield_index]);
argv[call_yield_index] = strdup("1");
} else {
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
free(argv[call_yield_index]);
argv[call_yield_index] = strdup("0");
rmaps_node->nodename);
}
/* setting exec_argv and exec_path for qrsh */
@ -655,8 +609,6 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
opal_output(0, "pls:gridengine: execve failed with errno=%d\n", errno);
exit(-1);
} else { /* parent */
gridengine_daemon_info_t *daemon_info;
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: parent");
}
@ -664,36 +616,20 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
/* setup callback on sigchild - wait until setup above is complete
* as the callback can occur in the call to orte_wait_cb
*/
daemon_info = OBJ_NEW(gridengine_daemon_info_t);
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(daemon_info->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
}
daemon_info->nodename= strdup(ras_node->node_name);
orte_wait_cb(pid, orte_pls_gridengine_wait_daemon, daemon_info);
orte_wait_cb(pid, orte_pls_gridengine_wait_daemon, dmn);
vpid++;
}
free(name);
}
}
/* all done, so store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&mapping);
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&daemons);
if (NULL != lib_base) {
free(lib_base);
}
@ -707,6 +643,7 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
return rc;
}
#if 0
/**
* Query the registry for the gridengine slot count, and update it
*/
@ -808,6 +745,7 @@ static int update_slot_keyval(orte_ras_node_t* ras_node, int* slot_cnt)
return rc;
}
#endif
/**
* Query the registry for all nodes participating in the job

Просмотреть файл

@ -38,6 +38,7 @@
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/smr/smr.h"
@ -46,10 +47,7 @@
#include "orte/runtime/orte_wait.h"
/* remove for ORTE 2.0 */
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/sds/base/base.h"
#include "orte/mca/rmgr/base/rmgr_private.h"
#include "orte/mca/pls/pls.h"
#include "orte/mca/pls/poe/pls_poe.h"
@ -338,33 +336,34 @@ poe_wait_job - call back when POE finish
*/
static void poe_wait_job(pid_t pid, int status, void* cbdata)
{
opal_list_t map;
opal_list_item_t* item;
orte_job_map_t *map;
opal_list_item_t *item, *item2;
int rc;
/* query allocation for the job */
OBJ_CONSTRUCT(&map, opal_list_t);
rc = orte_rmaps_base_get_map(mca_pls_poe_component.jobid,&map);
rc = orte_rmaps.get_job_map(&map, mca_pls_poe_component.jobid);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
for(item = opal_list_get_first(&map);
item != opal_list_get_end(&map);
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item;
orte_std_cntr_t i;
orte_mapped_node_t* node = (orte_mapped_node_t*) item;
for(i = 0 ; i < map->num_procs ; ++i) {
orte_session_dir_finalize(&(map->procs[i])->proc_name);
rc = orte_smr.set_proc_state(&(map->procs[i]->proc_name),
for (item2 = opal_list_get_first(&node->procs);
item2 != opal_list_get_end(&node->procs);
item2 = opal_list_get_next(item2)) {
orte_mapped_proc_t* proc = (orte_mapped_proc_t*)item2;
orte_session_dir_finalize(&(proc->name));
rc = orte_smr.set_proc_state(&(proc->name),
ORTE_PROC_STATE_ABORTED, status);
}
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
}
OBJ_DESTRUCT(&map);
}
}
/**
@ -379,7 +378,7 @@ poe_create_cmd_file - create POE command file
static int poe_create_cmd_file(
FILE *cfp,
orte_app_context_t* context,
orte_rmaps_base_proc_t* proc,
orte_mapped_proc_t* proc,
orte_vpid_t vpid_start,
orte_vpid_t vpid_range)
{
@ -428,7 +427,7 @@ static int poe_create_cmd_file(
free(uri);
/* push name into environment */
orte_ns_nds_env_put(&proc->proc_name, vpid_start, vpid_range, &environ_copy);
orte_ns_nds_env_put(&proc->name, vpid_start, vpid_range, &environ_copy);
if (context->argv == NULL) {
context->argv = malloc(sizeof(char*)*2);
@ -461,8 +460,8 @@ poe_launch_interactive - launch an interactive job
*/
static inline int poe_launch_interactive_job(orte_jobid_t jobid)
{
opal_list_t map, nodes, mapping_list;
opal_list_item_t* item;
orte_job_map_t *map;
opal_list_item_t *item, *item2;
orte_vpid_t vpid_start, vpid_range;
orte_std_cntr_t num_nodes, num_procs;
FILE *hfp, *cfp;
@ -479,12 +478,11 @@ static inline int poe_launch_interactive_job(orte_jobid_t jobid)
mca_pls_poe_component.jobid = jobid;
OBJ_CONSTRUCT(&nodes, opal_list_t);
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
/* get the map for this job */
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
num_nodes = opal_list_get_size(&nodes);
num_nodes = opal_list_get_size(&map->nodes);
if(!strncmp(mca_pls_poe_component.resource_allocation,"hostfile",8)) {
@ -494,35 +492,32 @@ static inline int poe_launch_interactive_job(orte_jobid_t jobid)
(NULL==(hfp=fopen(mca_pls_poe_component.hostfile,"w"))) ) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
for(item = opal_list_get_first(&nodes);
item != opal_list_get_end(&nodes);
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
fprintf(hfp,"%s\n",node->node_name);
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
fprintf(hfp,"%s\n",node->nodename);
}
fclose(hfp);
}
rc = orte_rmgr_base_get_job_slots(jobid, &num_procs);
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
OBJ_CONSTRUCT(&map, opal_list_t);
rc = orte_rmaps_base_get_map(jobid,&map);
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
rc = orte_rmgr.get_vpid_range(jobid, &vpid_start, &vpid_range);
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
/* Create a temporary POE command file */
for(item = opal_list_get_first(&map);
item != opal_list_get_end(&map);
num_procs = 0;
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map2 = (orte_rmaps_base_map_t*)item;
orte_std_cntr_t i;
for(i=0; i<map2->num_procs; i++) {
rc = poe_create_cmd_file(cfp, map2->app, map2->procs[i], vpid_start, vpid_range);
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
for (item2 = opal_list_get_first(&node->procs);
item2 != opal_list_get_end(&node->procs);
item2 = opal_list_get_next(item2)) {
orte_mapped_proc_t* proc = (orte_mapped_proc_t*)item2;
rc = poe_create_cmd_file(cfp, map->apps[proc->app_idx], proc, vpid_start, vpid_range);
if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
num_procs++;
}
}
fclose(cfp);
@ -587,20 +582,6 @@ static inline int poe_launch_interactive_job(orte_jobid_t jobid)
cleanup:
while(NULL != (item = opal_list_remove_first(&map))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&map);
while(NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
while(NULL != (item = opal_list_remove_first(&mapping_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping_list);
return rc;
}

Просмотреть файл

@ -81,7 +81,7 @@
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/smr/smr.h"
#include "orte/mca/pls/pls.h"
@ -109,17 +109,6 @@ orte_pls_base_module_t orte_pls_rsh_module = {
orte_pls_rsh_finalize
};
/* struct used to have enough information to clean up the state of the
universe if a daemon aborts */
struct rsh_daemon_info_t {
opal_object_t super;
orte_ras_node_t* node;
orte_jobid_t jobid;
};
typedef struct rsh_daemon_info_t rsh_daemon_info_t;
static OBJ_CLASS_INSTANCE(rsh_daemon_info_t,
opal_object_t,
NULL, NULL);
static void set_handler_default(int sig);
enum {
@ -140,11 +129,15 @@ static const char * orte_pls_rsh_shell_name[] = {
"unknown"
};
/* local global storage of the list of active daemons */
opal_list_t active_daemons;
/**
* Check the Shell variable on the specified node
*/
static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell)
static int orte_pls_rsh_probe(orte_mapped_node_t * node, orte_pls_rsh_shell * shell)
{
char ** argv;
int argc, rc, nfds, i;
@ -156,7 +149,7 @@ static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: going to check SHELL variable on node %s\n",
node->node_name);
node->nodename);
}
*shell = ORTE_PLS_RSH_SHELL_UNKNOWN;
/*
@ -164,7 +157,7 @@ static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell
*/
argv = opal_argv_copy(mca_pls_rsh_component.agent_argv);
argc = mca_pls_rsh_component.agent_argc;
opal_argv_append(&argc, &argv, node->node_name);
opal_argv_append(&argc, &argv, node->nodename);
opal_argv_append(&argc, &argv, "echo $SHELL");
if (pipe(fd)) {
opal_output(0, "pls:rsh: pipe failed with errno=%d\n", errno);
@ -251,7 +244,7 @@ static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell
}
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: node:%s has SHELL:%s\n",
node->node_name, orte_pls_rsh_shell_name[*shell]);
node->nodename, orte_pls_rsh_shell_name[*shell]);
}
return rc;
}
@ -283,8 +276,9 @@ static int orte_pls_rsh_fill_exec_path ( char ** exec_path)
static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
{
rsh_daemon_info_t *info = (rsh_daemon_info_t*) cbdata;
opal_list_t map;
orte_pls_daemon_info_t *info = (orte_pls_daemon_info_t*) cbdata;
orte_mapped_node_t *node;
orte_mapped_proc_t *proc;
opal_list_item_t *item;
int rc;
@ -298,11 +292,8 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
*/
if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) {
/* get the mapping for our node so we can cancel the right things */
OBJ_CONSTRUCT(&map, opal_list_t);
rc = orte_rmaps_base_get_node_map(orte_process_info.my_name->cellid,
info->jobid,
info->node->node_name,
&map);
rc = orte_rmaps.get_node_map(&node, info->cell,
info->nodename, info->active_job);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -310,33 +301,30 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
/* set state of all processes associated with the daemon as
terminated */
for(item = opal_list_get_first(&map);
item != opal_list_get_end(&map);
for(item = opal_list_get_first(&node->procs);
item != opal_list_get_end(&node->procs);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item;
orte_std_cntr_t i;
proc = (orte_mapped_proc_t*) item;
for (i = 0 ; i < map->num_procs ; ++i) {
/* Clean up the session directory as if we were the
process itself. This covers the case where the
process died abnormally and didn't cleanup its own
session directory. */
orte_session_dir_finalize(&(map->procs[i])->proc_name);
orte_session_dir_finalize(&(proc->name));
rc = orte_smr.set_proc_state(&(map->procs[i]->proc_name),
rc = orte_smr.set_proc_state(&(proc->name),
ORTE_PROC_STATE_ABORTED, status);
}
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
}
OBJ_DESTRUCT(&map);
OBJ_RELEASE(node);
cleanup:
/* tell the user something went wrong */
opal_output(0, "ERROR: A daemon on node %s failed to start as expected.",
info->node->node_name);
info->nodename);
opal_output(0, "ERROR: There may be more information available from");
opal_output(0, "ERROR: the remote shell (see above).");
@ -361,6 +349,15 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
/* release any waiting threads */
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
/* tell the system that this daemon is gone */
if (ORTE_SUCCESS != (rc = orte_pls_base_remove_daemon(info))) {
ORTE_ERROR_LOG(rc);
}
/* remove the daemon from our local list */
opal_list_remove_item(&active_daemons, &info->super);
OBJ_RELEASE(info);
if (mca_pls_rsh_component.num_children-- >=
mca_pls_rsh_component.num_concurrent ||
mca_pls_rsh_component.num_children == 0) {
@ -368,9 +365,6 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
}
OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);
/* cleanup */
OBJ_RELEASE(info->node);
OBJ_RELEASE(info);
}
/**
@ -380,18 +374,19 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
int orte_pls_rsh_launch(orte_jobid_t jobid)
{
opal_list_t mapping;
opal_list_item_t* m_item, *n_item;
orte_job_map_t *map;
opal_list_item_t *n_item;
orte_mapped_node_t *rmaps_node;
orte_std_cntr_t num_nodes;
orte_vpid_t vpid;
int node_name_index1;
int node_name_index2;
int proc_name_index;
int local_exec_index, local_exec_index_end;
int call_yield_index;
char *jobid_string;
char *uri, *param;
char **argv, **tmp;
char *prefix_dir;
int argc;
int rc;
sigset_t sigs;
@ -399,33 +394,45 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
bool remote_bash = false, remote_csh = false;
bool local_bash = false, local_csh = false;
char *lib_base = NULL, *bin_base = NULL;
opal_list_t daemons;
orte_pls_daemon_info_t *dmn;
/* setup a list that will contain the info for all the daemons
* so we can store it on the registry when done
* so we can store it on the registry when done and use it
* locally to track their state
*/
OBJ_CONSTRUCT(&daemons, opal_list_t);
OBJ_CONSTRUCT(&active_daemons, opal_list_t);
/* Query the list of nodes allocated and mapped to this job.
/* Get the map for this job
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
* - need to know the nodes we are launching on
* All other mapping responsibilities fall to orted in the fork PLS
*/
OBJ_CONSTRUCT(&mapping, opal_list_t);
rc = orte_rmaps_base_get_map(jobid, &mapping);
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
num_nodes = 0;
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
num_nodes += opal_list_get_size(&map->nodes);
}
num_nodes = (orte_std_cntr_t)opal_list_get_size(&map->nodes);
/*
* After a discussion between Ralph & Jeff, we concluded that we
* really are handling the prefix dir option incorrectly. It currently
* is associated with an app_context, yet it really refers to the
* location where OpenRTE/Open MPI is installed on a NODE. Fixing
* this right now would involve significant change to orterun as well
* as elsewhere, so we will intentionally leave this incorrect at this
* point. The error, however, is identical to that seen in all prior
* releases of OpenRTE/Open MPI, so our behavior is no worse than before.
*
* A note to fix this, along with ideas on how to do so, has been filed
* on the project's Trac system under "feature enhancement".
*
* For now, default to the prefix_dir provided in the first app_context.
* Since there always MUST be at least one app_context, we are safe in
* doing this.
*/
prefix_dir = map->apps[0]->prefix_dir;
/*
* Allocate a range of vpids for the daemons.
@ -475,12 +482,8 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
}
} else {
orte_pls_rsh_shell shell;
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)opal_list_get_first(&mapping);
orte_rmaps_base_node_t* rmaps_node =
(orte_rmaps_base_node_t*)opal_list_get_first(&map->nodes);
orte_ras_node_t* node = rmaps_node->node;
rc = orte_pls_rsh_probe(node, &shell);
rmaps_node = (orte_mapped_node_t*)opal_list_get_first(&map->nodes);
rc = orte_pls_rsh_probe(rmaps_node, &shell);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
@ -582,10 +585,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
free(uri);
free(param);
opal_argv_append(&argc, &argv, "--mpi-call-yield");
call_yield_index = argc;
opal_argv_append(&argc, &argv, "0");
local_exec_index_end = argc;
if (!(remote_csh || remote_bash)) {
opal_argv_append(&argc, &argv, ")");
@ -633,60 +632,48 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
bin_base = opal_basename(OPAL_BINDIR);
/*
* Iterate through each of the contexts
*/
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
char * prefix_dir = map->app->prefix_dir;
/*
* For each of the contexts - iterate through the nodes.
* Iterate through each of the nodes
*/
for(n_item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
orte_ras_node_t* ras_node = rmaps_node->node;
orte_process_name_t* name;
pid_t pid;
char *exec_path;
char **exec_argv;
/* already launched on this node */
if(ras_node->node_launched++ != 0)
continue;
rmaps_node = (orte_mapped_node_t*)n_item;
/* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
opal_list_append(&daemons, &dmn->super);
dmn->active_job = jobid;
opal_list_append(&active_daemons, &dmn->super);
/* setup node name */
free(argv[node_name_index1]);
if (NULL != ras_node->node_username &&
0 != strlen (ras_node->node_username)) {
if (NULL != rmaps_node->username &&
0 != strlen (rmaps_node->username)) {
asprintf (&argv[node_name_index1], "%s@%s",
ras_node->node_username, ras_node->node_name);
rmaps_node->username, rmaps_node->nodename);
} else {
argv[node_name_index1] = strdup(ras_node->node_name);
argv[node_name_index1] = strdup(rmaps_node->nodename);
}
free(argv[node_name_index2]);
argv[node_name_index2] = strdup(ras_node->node_name);
argv[node_name_index2] = strdup(rmaps_node->nodename);
/* save it in the daemon info */
dmn->nodename = strdup(ras_node->node_name);
dmn->nodename = strdup(rmaps_node->nodename);
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, ras_node->node_cellid, 0, vpid);
rc = orte_ns.create_process_name(&name, rmaps_node->cell, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* save it in the daemon info */
dmn->cell = ras_node->node_cellid;
dmn->cell = rmaps_node->cell;
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -715,28 +702,14 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: launching on node %s\n",
ras_node->node_name);
rmaps_node->nodename);
}
/* set the progress engine schedule for this node.
* if node_slots is set to zero, then we default to
* NOT being oversubscribed
/* We don't need to sense an oversubscribed condition and set the sched_yield
* for the node as we are only launching the daemons at this time. The daemons
* are now smart enough to set the oversubscribed condition themselves when
* they launch the local procs.
*/
if (ras_node->node_slots > 0 &&
(orte_std_cntr_t)opal_list_get_size(&rmaps_node->node_procs) > ras_node->node_slots) {
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
ras_node->node_slots, opal_list_get_size(&rmaps_node->node_procs));
}
free(argv[call_yield_index]);
argv[call_yield_index] = strdup("1");
} else {
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
free(argv[call_yield_index]);
argv[call_yield_index] = strdup("0");
}
/* Is this a local launch?
*
@ -746,11 +719,11 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
* current nodename, which must be local. If that doesn't
* match, check using ifislocal().
*/
if (0 == strcmp(ras_node->node_name, orte_system_info.nodename) ||
opal_ifislocal(ras_node->node_name)) {
if (0 == strcmp(rmaps_node->nodename, orte_system_info.nodename) ||
opal_ifislocal(rmaps_node->nodename)) {
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: %s is a LOCAL node\n",
ras_node->node_name);
rmaps_node->nodename);
}
exec_argv = &argv[local_exec_index];
exec_path = opal_path_findv(exec_argv[0], 0, environ, NULL);
@ -847,7 +820,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
} else {
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: %s is a REMOTE node\n",
ras_node->node_name);
rmaps_node->nodename);
}
exec_argv = argv;
exec_path = strdup(mca_pls_rsh_component.agent_path);
@ -951,8 +924,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
exit(-1);
} else { /* father */
rsh_daemon_info_t *daemon_info;
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
/* JJH Bug:
* If we are in '--debug-daemons' we keep the ssh connection
@ -974,11 +945,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
/* setup callback on sigchild - wait until setup above is complete
* as the callback can occur in the call to orte_wait_cb
*/
daemon_info = OBJ_NEW(rsh_daemon_info_t);
OBJ_RETAIN(ras_node);
daemon_info->node = ras_node;
daemon_info->jobid = jobid;
orte_wait_cb(pid, orte_pls_rsh_wait_daemon, daemon_info);
orte_wait_cb(pid, orte_pls_rsh_wait_daemon, dmn);
/* if required - add delay to avoid problems w/ X11 authentication */
if (mca_pls_rsh_component.debug && mca_pls_rsh_component.delay) {
@ -988,23 +955,14 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
}
free(name);
}
}
/* all done, so store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&active_daemons))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&mapping);
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&daemons);
/* OBJ_RELEASE(map); */
if (NULL != lib_base) {
free(lib_base);

Просмотреть файл

@ -59,7 +59,7 @@
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/smr/smr.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/pls/pls.h"
#include "orte/mca/pls/base/pls_private.h"
@ -109,8 +109,8 @@ extern char **environ;
static int pls_slurm_launch_job(orte_jobid_t jobid)
{
opal_list_t nodes, mapping_list;
opal_list_item_t *item, *item2;
orte_job_map_t *map;
opal_list_item_t *item;
size_t num_nodes;
orte_vpid_t vpid;
char *jobid_string;
@ -137,15 +137,13 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
*/
OBJ_CONSTRUCT(&daemons, opal_list_t);
/* Query the list of nodes allocated and mapped to this job.
/* Query the map for this job.
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
* All other mapping responsibilities fall to orted in the fork PLS
*/
OBJ_CONSTRUCT(&nodes, opal_list_t);
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
@ -153,7 +151,7 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
/*
* Allocate a range of vpids for the daemons.
*/
num_nodes = opal_list_get_size(&nodes);
num_nodes = opal_list_get_size(&map->nodes);
if (num_nodes == 0) {
return ORTE_ERR_BAD_PARAM;
}
@ -206,12 +204,12 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
nodelist_argv = NULL;
nodelist_argc = 0;
for (item = opal_list_get_first(&nodes);
item != opal_list_get_end(&nodes);
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
opal_argv_append(&nodelist_argc, &nodelist_argv, node->node_name);
opal_argv_append(&nodelist_argc, &nodelist_argv, node->nodename);
}
nodelist_flat = opal_argv_join(nodelist_argv, ',');
asprintf(&tmp, "--nodelist=%s", nodelist_flat);
@ -308,43 +306,15 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
}
}
/* Bookkeeping -- save the node names */
cur_prefix = NULL;
for (item = opal_list_get_first(&nodes);
item != opal_list_get_end(&nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
opal_list_t map;
size_t num_processes;
OBJ_CONSTRUCT(&map, opal_list_t);
/* Get the mapping of this very node */
rc = orte_rmaps_base_get_node_map(orte_process_info.my_name->cellid,
jobid,
node->node_name,
&map);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* Copy the prefix-directory specified within the
/* Copy the prefix-directory specified in the
corresponding app_context. If there are multiple,
different prefix's in the app context, complain (i.e., only
allow one --prefix option for the entire slurm run -- we
don't support different --prefix'es for different nodes in
the SLURM pls) */
num_processes = 0;
for (item2 = opal_list_get_first(&map);
item2 != opal_list_get_end(&map);
item2 = opal_list_get_next(item2)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item2;
char * app_prefix_dir = map->app->prefix_dir;
/* Increment the number of processes allocated to this node
* This allows us to accurately test for oversubscription */
num_processes += map->num_procs;
cur_prefix = NULL;
for (i=0; i < map->num_apps; i++) {
char * app_prefix_dir = map->apps[i]->prefix_dir;
/* Check for already set cur_prefix_dir -- if different,
complain */
if (NULL != app_prefix_dir) {
@ -358,7 +328,7 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
/* If not yet set, copy it; iff set, then it's the
same anyway */
if (NULL == cur_prefix) {
cur_prefix = strdup(map->app->prefix_dir);
cur_prefix = strdup(app_prefix_dir);
if (mca_pls_slurm_component.debug) {
opal_output (0, "pls:slurm: Set prefix:%s",
cur_prefix);
@ -367,21 +337,28 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
}
}
/* setup the daemon info for each node */
vpid = 0;
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
/* record the daemons info for this node */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->cell = node->node_cellid;
dmn->nodename = strdup(node->node_name);
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(dmn->name), node->node_cellid, 0, vpid))) {
dmn->active_job = jobid;
dmn->cell = node->cell;
dmn->nodename = strdup(node->nodename);
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(dmn->name), dmn->cell, 0, vpid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
opal_list_append(&daemons, &dmn->super);
vpid++;
}
/* store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc);
}
@ -390,29 +367,6 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
var = mca_base_param_environ_variable("seed", NULL, NULL);
opal_setenv(var, "0", true, &env);
#if 0
/* JMS What to do for sched_yield? */
/* set the progress engine schedule for this node. if node_slots
is set to zero, then we default to NOT being oversubscribed */
if (node->node_slots > 0 &&
num_processes > node->node_slots) {
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
node->node_slots, num_processes);
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "1", true, &env);
} else {
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "0", true, &env);
}
free(var);
#endif
/* exec the daemon */
rc = pls_slurm_start_proc(argc, argv, env, cur_prefix);
if (ORTE_SUCCESS != rc) {
@ -424,16 +378,6 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
/* JMS: how do we catch when srun dies? */
cleanup:
while (NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
while (NULL != (item = opal_list_remove_first(&mapping_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping_list);
while (NULL != (item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(item);
}

Просмотреть файл

@ -58,12 +58,10 @@
#include "orte/mca/smr/smr.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/sds/base/base.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/ns/ns.h"
/* needs to be cleaned up for ORTE 2.0 */
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/pls/base/pls_private.h"
#include "pls_tm.h"
@ -118,14 +116,16 @@ extern char **environ;
static int pls_tm_launch_job(orte_jobid_t jobid)
{
opal_list_t mapping;
opal_list_item_t *m_item, *n_item;
orte_job_map_t *map;
opal_list_item_t *item;
size_t num_nodes;
orte_vpid_t vpid;
int node_name_index;
int proc_name_index;
char *jobid_string;
char *uri, *param;
char **env;
char *var;
char **argv;
int argc;
int rc;
@ -139,24 +139,17 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
opal_list_t daemons;
orte_pls_daemon_info_t *dmn;
/* Query the list of nodes allocated and mapped to this job.
/* Query the map for this job.
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
*/
OBJ_CONSTRUCT(&mapping, opal_list_t);
rc = orte_rmaps_base_get_map(jobid, &mapping);
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
num_nodes = 0;
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
num_nodes += opal_list_get_size(&map->nodes);
}
num_nodes = opal_list_get_size(&map->nodes);
/*
* Allocate a range of vpids for the daemons.
@ -286,31 +279,25 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
lib_base = opal_basename(OPAL_LIBDIR);
bin_base = opal_basename(OPAL_BINDIR);
/*
* iterate through each of the contexts
*/
for (m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
char** env;
char* var;
/* setup environment */
env = opal_argv_copy(environ);
var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env);
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables. */
if (NULL != map->app->prefix_dir) {
LD_LIBRARY_PATH environment variables. We only allow
a single prefix to be specified. Since there will
always be at least one app_context, we take it from
there
*/
if (NULL != map->apps[0]->prefix_dir) {
char *newenv;
for (i = 0; NULL != env && NULL != env[i]; ++i) {
/* Reset PATH */
if (0 == strncmp("PATH=", env[i], 5)) {
asprintf(&newenv, "%s/%s:%s",
map->app->prefix_dir, bin_base, env[i] + 5);
map->apps[0]->prefix_dir, bin_base, env[i] + 5);
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: resetting PATH: %s",
newenv);
@ -322,7 +309,7 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
/* Reset LD_LIBRARY_PATH */
else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) {
asprintf(&newenv, "%s/%s:%s",
map->app->prefix_dir, lib_base, env[i] + 16);
map->apps[0]->prefix_dir, lib_base, env[i] + 16);
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: resetting LD_LIBRARY_PATH: %s",
newenv);
@ -347,19 +334,13 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
/* Iterate through each of the nodes and spin
* up a daemon.
*/
for (n_item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
orte_ras_node_t* node = rmaps_node->node;
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(n_item)) {
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
orte_process_name_t* name;
char* name_string;
/* already launched on this node */
if (0 != node->node_launched++) {
continue;
}
/* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid;
@ -367,14 +348,14 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
/* setup node name */
free(argv[node_name_index]);
argv[node_name_index] = strdup(node->node_name);
argv[node_name_index] = strdup(node->nodename);
/* record the node name in the daemon struct */
dmn->cell = node->node_cellid;
dmn->nodename = strdup(node->node_name);
dmn->cell = node->cell;
dmn->nodename = strdup(node->nodename);
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
rc = orte_ns.create_process_name(&name, node->cell, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -390,7 +371,7 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
if (mca_pls_tm_component.debug ||
mca_pls_tm_component.verbose) {
opal_output(0, "pls:tm: launching on node %s",
node->node_name);
node->nodename);
}
/* setup process name */
@ -402,28 +383,6 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
free(argv[proc_name_index]);
argv[proc_name_index] = strdup(name_string);
/* set the progress engine schedule for this node.
* if node_slots is set to zero, then we default to
* NOT being oversubscribed
*/
if (node->node_slots > 0 &&
(orte_std_cntr_t)opal_list_get_size(&rmaps_node->node_procs) > node->node_slots) {
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
node->node_slots,
opal_list_get_size(&rmaps_node->node_procs));
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "1", true, &env);
} else {
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "0", true, &env);
}
free(var);
/* exec the daemon */
if (mca_pls_tm_component.debug) {
param = opal_argv_join(argv, ' ');
@ -433,7 +392,7 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
}
}
rc = pls_tm_start_proc(node->node_name, argc, argv, env,
rc = pls_tm_start_proc(node->nodename, argc, argv, env,
tm_task_ids + launched,
tm_events + launched);
if (ORTE_SUCCESS != rc) {
@ -447,13 +406,12 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
/* Allow some progress to occur */
opal_event_loop(OPAL_EVLOOP_NONBLOCK);
}
}
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm:launch: finished spawning orteds\n");
}
/* all done, so store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc);
}
@ -478,10 +436,6 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
free(tm_task_ids);
}
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&mapping);
if (NULL != lib_base) {
free(lib_base);
}
@ -490,8 +444,8 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
}
/* deconstruct the daemon list */
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(m_item);
while (NULL != (item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&daemons);

Просмотреть файл

@ -30,7 +30,7 @@
#import "orte/mca/pls/pls.h"
#import "orte/mca/errmgr/errmgr.h"
#import "orte/mca/ras/ras_types.h"
#import "orte/mca/rmaps/base/rmaps_private.h"
#import "orte/mca/rmaps/rmaps.h"
#import "orte/mca/smr/smr.h"
#import "pls_xgrid_client.h"
@ -229,8 +229,8 @@ char **environ;
-(int) launchJob:(orte_jobid_t) jobid
{
opal_list_t mapping;
opal_list_item_t *m_item, *n_item;
orte_job_map_t *map;
opal_list_item_t *item;
size_t num_nodes;
orte_vpid_t vpid;
int rc, i = 0;
@ -239,24 +239,17 @@ char **environ;
char *orted_path;
char *nsuri = NULL, *gpruri = NULL;
/* Query the list of nodes allocated and mapped to this job.
/* Query the map for this job.
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
*/
OBJ_CONSTRUCT(&mapping, opal_list_t);
rc = orte_rmaps_base_get_map(jobid, &mapping);
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
num_nodes = 0;
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
num_nodes += opal_list_get_size(&map->nodes);
}
num_nodes = opal_list_get_size(&map->nodes);
/*
* Allocate a range of vpids for the daemons.
@ -300,41 +293,27 @@ char **environ;
/* build up the array of task specifications */
NSMutableDictionary *taskSpecifications = [NSMutableDictionary dictionary];
/*
* iterate through each of the contexts
*/
for (m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
/* Iterate through each of the nodes and spin
* up a daemon.
*/
for (n_item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
orte_ras_node_t* node = rmaps_node->node;
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(n_item)) {
orte_mapped_node_t* rmaps_node = (orte_mapped_node_t*)item;
orte_process_name_t* name;
char* name_string;
/* already launched on this node */
if (0 != node->node_launched++) {
continue;
}
/* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid;
opal_list_append(&daemons, &dmn->super);
/* record the node name in the daemon struct */
dmn->cell = node->node_cellid;
dmn->nodename = strdup(node->node_name);
dmn->cell = node->cell;
dmn->nodename = strdup(node->nodename);
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
rc = orte_ns.create_process_name(&name, node->cell, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -349,7 +328,7 @@ char **environ;
/* setup per-node options */
opal_output_verbose(1, orte_pls_base.pls_output,
"orte:pls:xgrid: launching on node %s",
node->node_name);
node->nodename);
/* setup process name */
rc = orte_ns.get_proc_name_string(&name_string, name);
@ -367,7 +346,7 @@ char **environ;
@"--bootproxy", [NSString stringWithFormat: @"%d", jobid],
@"--name", [NSString stringWithCString: name_string],
@"--num_procs", [NSString stringWithFormat: @"%d", 1],
@"--nodename", [NSString stringWithCString: node->node_name],
@"--nodename", [NSString stringWithCString: node->nodename],
@"--nsreplica", [NSString stringWithCString: nsuri],
@"--gprreplica", [NSString stringWithCString: gpruri],
nil];
@ -378,7 +357,6 @@ char **environ;
vpid++; i++;
}
}
/* job specification */
NSMutableDictionary *jobSpecification = [NSMutableDictionary dictionary];
@ -419,7 +397,7 @@ char **environ;
forKey: [NSString stringWithFormat: @"%d", jobid]];
/* all done, so store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc);
}
@ -427,11 +405,6 @@ cleanup:
if (NULL != nsuri) free(nsuri);
if (NULL != gpruri) free(gpruri);
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&mapping);
/* deconstruct the daemon list */
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(m_item);

Просмотреть файл

@ -20,14 +20,14 @@ dist_pkgdata_DATA = base/help-orte-rmaps-base.txt
headers += \
base/base.h \
base/rmaps_class_instances.h \
base/rmaps_private.h
libmca_rmaps_la_SOURCES += \
base/rmaps_base_close.c \
base/rmaps_base_map.c \
base/rmaps_base_registry_fns.c \
base/rmaps_base_map_job.c \
base/rmaps_base_node.c \
base/rmaps_base_no_ops.c \
base/rmaps_base_support_fns.c \
base/rmaps_base_open.c \
base/rmaps_base_receive.c \
base/rmaps_base_find_avail.c \

Просмотреть файл

@ -29,14 +29,14 @@
/*
* JOB_MAP
*/
int orte_rmaps_base_compare_map(orte_rmaps_base_map_t *value1, orte_rmaps_base_map_t *value2, orte_data_type_t type)
int orte_rmaps_base_compare_map(orte_job_map_t *value1, orte_job_map_t *value2, orte_data_type_t type)
{
return ORTE_EQUAL;
}
/* MAPPED_PROC */
int orte_rmaps_base_compare_mapped_proc(orte_rmaps_base_proc_t *value1, orte_rmaps_base_proc_t *value2, orte_data_type_t type)
int orte_rmaps_base_compare_mapped_proc(orte_mapped_proc_t *value1, orte_mapped_proc_t *value2, orte_data_type_t type)
{
return ORTE_EQUAL;
}
@ -44,7 +44,7 @@ int orte_rmaps_base_compare_mapped_proc(orte_rmaps_base_proc_t *value1, orte_rma
/* MAPPED_NODE */
int orte_rmaps_base_compare_mapped_node(orte_rmaps_base_node_t *value1, orte_rmaps_base_node_t *value2, orte_data_type_t type)
int orte_rmaps_base_compare_mapped_node(orte_mapped_node_t *value1, orte_mapped_node_t *value2, orte_data_type_t type)
{
return ORTE_EQUAL;
}

Просмотреть файл

@ -34,12 +34,12 @@
/*
* JOB_MAP
*/
int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t *src, orte_data_type_t type)
int orte_rmaps_base_copy_map(orte_job_map_t **dest, orte_job_map_t *src, orte_data_type_t type)
{
orte_std_cntr_t i;
int rc;
opal_list_item_t *item;
orte_rmaps_base_node_t *srcnode, *nodeptr;
orte_mapped_node_t *srcnode, *nodeptr;
if (NULL == src) {
*dest = NULL;
@ -47,34 +47,34 @@ int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t
}
/* create the new object */
*dest = OBJ_NEW(orte_rmaps_base_map_t);
*dest = OBJ_NEW(orte_job_map_t);
if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* copy data into it */
(*dest)->app = src->app;
(*dest)->job = src->job;
(*dest)->num_apps = src->num_apps;
(*dest)->procs = (orte_rmaps_base_proc_t**)malloc(src->num_procs * sizeof(orte_rmaps_base_proc_t));
if (NULL == (*dest)->procs) {
(*dest)->apps = (orte_app_context_t**)malloc(src->num_apps * sizeof(orte_app_context_t*));
if (NULL == (*dest)->apps) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(*dest);
return ORTE_ERR_OUT_OF_RESOURCE;
}
for (i=0; i < src->num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_proc(&((*dest)->procs[i]), src->procs[i], ORTE_MAPPED_PROC))) {
for (i=0; i < src->num_apps; i++) {
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((*dest)->apps[i]), src->apps[i], ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest);
return rc;
}
}
(*dest)->num_procs = src->num_procs;
for (item = opal_list_get_first(&(src->nodes));
item != opal_list_get_end(&(src->nodes));
item = opal_list_get_next(item)) {
srcnode = (orte_rmaps_base_node_t*)item;
srcnode = (orte_mapped_node_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_node(&nodeptr, srcnode, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest);
@ -89,52 +89,40 @@ int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t
/*
* MAPPED_PROC
*/
int orte_rmaps_base_copy_mapped_proc(orte_rmaps_base_proc_t **dest, orte_rmaps_base_proc_t *src, orte_data_type_t type)
int orte_rmaps_base_copy_mapped_proc(orte_mapped_proc_t **dest, orte_mapped_proc_t *src, orte_data_type_t type)
{
int rc;
if (NULL == src) {
*dest = NULL;
return ORTE_SUCCESS;
}
/* create the new object */
*dest = OBJ_NEW(orte_rmaps_base_proc_t);
*dest = OBJ_NEW(orte_mapped_proc_t);
if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* copy data into it */
if (NULL != src->app) {
(*dest)->app = strdup(src->app);
}
(*dest)->name = src->name;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_node(&((*dest)->proc_node), src->proc_node, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest);
return rc;
}
(*dest)->rank = src->rank;
(*dest)->proc_name = src->proc_name;
(*dest)->proc_rank = src->proc_rank;
(*dest)->app_idx = src->app_idx;
(*dest)->pid = src->pid;
(*dest)->local_pid = src->local_pid;
return ORTE_SUCCESS;
}
/*
* MAPPED_NODE
*/
int orte_rmaps_base_copy_mapped_node(orte_rmaps_base_node_t **dest, orte_rmaps_base_node_t *src, orte_data_type_t type)
int orte_rmaps_base_copy_mapped_node(orte_mapped_node_t **dest, orte_mapped_node_t *src, orte_data_type_t type)
{
int rc;
opal_list_item_t *item;
orte_rmaps_base_proc_t *srcproc, *procptr;
orte_mapped_proc_t *srcproc, *procptr;
if (NULL == src) {
*dest = NULL;
@ -142,29 +130,43 @@ int orte_rmaps_base_copy_mapped_node(orte_rmaps_base_node_t **dest, orte_rmaps_b
}
/* create the new object */
*dest = OBJ_NEW(orte_rmaps_base_node_t);
*dest = OBJ_NEW(orte_mapped_node_t);
if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* copy data into it */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((*dest)->node), src->node, ORTE_RAS_NODE))) {
(*dest)->cell = src->cell;
if (NULL != src->nodename) {
(*dest)->nodename = strdup(src->nodename);
}
if (NULL != src->username) {
(*dest)->username = strdup(src->username);
}
if (NULL != src->daemon) {
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((*dest)->daemon), src->daemon, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest);
return rc;
}
}
for (item = opal_list_get_first(&(src->node_procs));
item != opal_list_get_end(&(src->node_procs));
(*dest)->oversubscribed = src->oversubscribed;
for (item = opal_list_get_first(&(src->procs));
item != opal_list_get_end(&(src->procs));
item = opal_list_get_next(item)) {
srcproc = (orte_rmaps_base_proc_t*)item;
srcproc = (orte_mapped_proc_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_proc(&procptr, srcproc, ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest);
return rc;
}
opal_list_append(&((*dest)->node_procs), &procptr->super);
opal_list_append(&((*dest)->procs), &procptr->super);
}
return ORTE_SUCCESS;

Просмотреть файл

@ -38,29 +38,28 @@ int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
{
int rc;
orte_std_cntr_t i, num_nodes;
orte_rmaps_base_map_t **maps;
orte_job_map_t **maps;
opal_list_item_t *item;
orte_rmaps_base_node_t *srcnode;
orte_mapped_node_t *srcnode;
/* array of pointers to orte_rmaps_base_map_t objects - need to pack the objects a set of fields at a time */
maps = (orte_rmaps_base_map_t**) src;
/* array of pointers to orte_job_map_t objects - need to pack the objects a set of fields at a time */
maps = (orte_job_map_t**) src;
for (i=0; i < num_vals; i++) {
/* pack the app_context */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, maps[i]->app, 1, ORTE_APP_CONTEXT))) {
/* pack the jobid this map is for */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->job), 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the number of procs */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->num_procs), 1, ORTE_STD_CNTR))) {
/* pack the number of app_contexts */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->num_apps), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the procs array */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(maps[i]->procs),
maps[i]->num_procs, ORTE_MAPPED_PROC))) {
/* pack the app_contexts */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, maps[i]->apps, maps[i]->num_apps, ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -77,7 +76,7 @@ int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
for (item = opal_list_get_first(&(maps[i]->nodes));
item != opal_list_get_end(&(maps[i]->nodes));
item = opal_list_get_next(item)) {
srcnode = (orte_rmaps_base_node_t*)item;
srcnode = (orte_mapped_node_t*)item;
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)srcnode,
1, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
@ -99,45 +98,33 @@ int orte_rmaps_base_pack_mapped_proc(orte_buffer_t *buffer, void *src,
{
int rc;
orte_std_cntr_t i;
orte_rmaps_base_proc_t **procs;
orte_mapped_proc_t **procs;
/* array of pointers to orte_rmaps_base_proc_t objects - need to pack the objects a set of fields at a time */
procs = (orte_rmaps_base_proc_t**) src;
/* array of pointers to orte_mapped_proc_t objects - need to pack the objects a set of fields at a time */
procs = (orte_mapped_proc_t**) src;
for (i=0; i < num_vals; i++) {
/* pack the app */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, procs[i]->app, 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the proc_node */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, procs[i]->proc_node, 1, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the proc name */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(&(procs[i]->proc_name)),
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(&(procs[i]->name)),
1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the rank */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->proc_rank), 1, ORTE_STD_CNTR))) {
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->rank), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the pls-pid */
/* pack the pid */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->pid), 1, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the local pid */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->local_pid), 1, ORTE_PID))) {
/* pack the app_idx */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->app_idx), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -155,22 +142,46 @@ int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
{
int rc;
orte_std_cntr_t i, num_procs;
orte_rmaps_base_node_t **nodes;
orte_mapped_node_t **nodes;
opal_list_item_t *item;
orte_rmaps_base_proc_t *srcproc;
orte_mapped_proc_t *srcproc;
/* array of pointers to orte_rmaps_base_node_t objects - need to pack the objects a set of fields at a time */
nodes = (orte_rmaps_base_node_t**) src;
/* array of pointers to orte_mapped_node_t objects - need to pack the objects a set of fields at a time */
nodes = (orte_mapped_node_t**) src;
for (i=0; i < num_vals; i++) {
/* pack the node object */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, nodes[i]->node, 1, ORTE_RAS_NODE))) {
/* pack the cellid */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->cell), 1, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the nodename */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->nodename), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the username */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->username), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the daemon's name */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->daemon), 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the oversubscribed flag */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->oversubscribed), 1, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the number of procs */
num_procs = (orte_std_cntr_t)opal_list_get_size(&(nodes[i]->node_procs));
num_procs = (orte_std_cntr_t)opal_list_get_size(&(nodes[i]->procs));
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &num_procs, 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -178,10 +189,10 @@ int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
/* pack the procs list */
if (0 < num_procs) {
for (item = opal_list_get_first(&(nodes[i]->node_procs));
item != opal_list_get_end(&(nodes[i]->node_procs));
for (item = opal_list_get_first(&(nodes[i]->procs));
item != opal_list_get_end(&(nodes[i]->procs));
item = opal_list_get_next(item)) {
srcproc = (orte_rmaps_base_proc_t*)item;
srcproc = (orte_mapped_proc_t*)item;
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)srcproc,
1, ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -34,10 +34,10 @@
/*
* JOB_MAP
*/
int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t *src, orte_data_type_t type)
int orte_rmaps_base_print_map(char **output, char *prefix, orte_job_map_t *src, orte_data_type_t type)
{
char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
orte_rmaps_base_node_t *srcnode;
orte_mapped_node_t *srcnode;
orte_std_cntr_t i, num_nodes;
opal_list_item_t *item;
int rc;
@ -52,32 +52,22 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "%sMap for app_context:", pfx2);
asprintf(&tmp, "%sMap for job: %ld\tNum app_contexts: %ld", pfx2, (long)src->job, (long)src->num_apps);
asprintf(&pfx, "%s\t", pfx2);
free(pfx2);
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->app, ORTE_APP_CONTEXT))) {
for (i=0; i < src->num_apps; i++) {
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->apps[i], ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc);
free(pfx);
free(tmp);
return rc;
}
asprintf(&tmp3, "%s\n%s\n%sNum elements in procs array: %ld", tmp, tmp2, pfx, (long)src->num_procs);
asprintf(&tmp3, "%s\n%s", tmp, tmp2);
free(tmp);
free(tmp2);
for (i=0; i < src->num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_proc(&tmp, pfx, src->procs[i], ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc);
free(pfx);
free(tmp3);
return rc;
}
asprintf(&tmp2, "%s\n%s", tmp3, tmp);
free(tmp);
free(tmp3);
tmp3 = tmp2;
tmp = tmp3;
}
num_nodes = (orte_std_cntr_t)opal_list_get_size(&(src->nodes));
@ -86,7 +76,7 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t
for (item = opal_list_get_first(&(src->nodes));
item != opal_list_get_end(&(src->nodes));
item = opal_list_get_next(item)) {
srcnode = (orte_rmaps_base_node_t*)item;
srcnode = (orte_mapped_node_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_node(&tmp2, pfx, srcnode, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
free(pfx);
@ -110,7 +100,7 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t
/*
* MAPPED_PROC
*/
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_base_proc_t *src, orte_data_type_t type)
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_mapped_proc_t *src, orte_data_type_t type)
{
char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
int rc;
@ -125,35 +115,18 @@ int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_ba
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "%sMapped proc:", pfx2);
asprintf(&tmp3, "%sMapped proc:\n%s\tProc Name:", pfx2, pfx2);
asprintf(&pfx, "%s\t", pfx2);
if (NULL != src->app) {
asprintf(&tmp2, "%s\n%sApp name: %s", tmp, pfx, src->app);
} else {
asprintf(&tmp2, "%s\n%sApplication has NULL name", tmp, pfx);
}
free(tmp);
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_node(&tmp, pfx, src->proc_node, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
free(pfx);
free(tmp2);
return rc;
}
asprintf(&tmp3, "%s\n%s\n%s\n%sProc Name:", tmp2, pfx, tmp, pfx);
free(tmp2);
free(tmp);
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, &(src->proc_name), ORTE_NAME))) {
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, &(src->name), ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
free(pfx);
free(tmp3);
return rc;
}
asprintf(&tmp, "%s\n%s\n%sProc Rank: %ld\tPLS pid: %ld\tLocal PID: %ld\n", tmp3, tmp2, pfx,
(long)src->proc_rank, (long)src->pid, (long)src->local_pid);
asprintf(&tmp, "%s\n%s\n%sProc Rank: %ld\tProc PID: %ld\tApp_context index: %ld\n", tmp3, tmp2, pfx,
(long)src->rank, (long)src->pid, (long)src->app_idx);
free(tmp2);
free(tmp3);
@ -168,15 +141,13 @@ int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_ba
/*
* MAPPED_NODE
*/
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_base_node_t *src, orte_data_type_t type)
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_node_t *src, orte_data_type_t type)
{
int rc;
char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
orte_std_cntr_t num_procs;
#if 0
opal_list_item_t *item;
orte_rmaps_base_proc_t *srcproc;
#endif
orte_mapped_proc_t *srcproc;
/* set default result */
*output = NULL;
@ -188,27 +159,30 @@ int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_ba
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "%sMapped node:", pfx2);
asprintf(&tmp, "%sMapped node:\n%s\tCell: %ld\tNodename: %s\tUsername: %s\n%s\tDaemon name:", pfx2, pfx2,
(long)src->cell, (NULL == src->nodename ? "NULL" : src->nodename),
(NULL == src->username ? "NULL" : src->username), pfx2);
asprintf(&pfx, "%s\t", pfx2);
free(pfx2);
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->node, ORTE_RAS_NODE))) {
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->daemon, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
free(pfx);
free(tmp);
return rc;
}
num_procs = (orte_std_cntr_t)opal_list_get_size(&(src->node_procs));
asprintf(&tmp3, "%s\n%s\n%sNum elements in procs list: %ld", tmp, tmp2, pfx, (long)num_procs);
num_procs = (orte_std_cntr_t)opal_list_get_size(&(src->procs));
asprintf(&tmp3, "%s\n\t%s\n%sOversubscribed: %s\tNum elements in procs list: %ld", tmp, tmp2, pfx,
(src->oversubscribed ? "True" : "False"), (long)num_procs);
free(tmp);
free(tmp2);
#if 0
for (item = opal_list_get_first(&(src->node_procs));
item != opal_list_get_end(&(src->node_procs));
for (item = opal_list_get_first(&(src->procs));
item != opal_list_get_end(&(src->procs));
item = opal_list_get_next(item)) {
srcproc = (orte_rmaps_base_proc_t*)item;
srcproc = (orte_mapped_proc_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_proc(&tmp2, pfx, srcproc, ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc);
free(pfx);
@ -220,7 +194,7 @@ int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_ba
free(tmp2);
tmp3 = tmp;
}
#endif
/* set the return */
*output = tmp3;

Просмотреть файл

@ -32,10 +32,10 @@
/*
* JOB_MAP
*/
int orte_rmaps_base_size_map(size_t *size, orte_rmaps_base_map_t *src, orte_data_type_t type)
int orte_rmaps_base_size_map(size_t *size, orte_job_map_t *src, orte_data_type_t type)
{
/* account for the object itself */
*size = sizeof(orte_rmaps_base_map_t);
*size = sizeof(orte_job_map_t);
/* if src is NULL, then that's all we wanted */
if (NULL == src) return ORTE_SUCCESS;
@ -46,10 +46,10 @@ int orte_rmaps_base_size_map(size_t *size, orte_rmaps_base_map_t *src, orte_data
/*
* MAPPED_PROC
*/
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_rmaps_base_proc_t *src, orte_data_type_t type)
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_mapped_proc_t *src, orte_data_type_t type)
{
/* account for the object itself */
*size = sizeof(orte_rmaps_base_proc_t);
*size = sizeof(orte_mapped_proc_t);
/* if src is NULL, then that's all we wanted */
if (NULL == src) return ORTE_SUCCESS;
@ -60,10 +60,10 @@ int orte_rmaps_base_size_mapped_proc(size_t *size, orte_rmaps_base_proc_t *src,
/*
* MAPPED_NODE
*/
int orte_rmaps_base_size_mapped_node(size_t *size, orte_rmaps_base_node_t *src, orte_data_type_t type)
int orte_rmaps_base_size_mapped_node(size_t *size, orte_mapped_node_t *src, orte_data_type_t type)
{
/* account for the object itself */
*size = sizeof(orte_rmaps_base_node_t);
*size = sizeof(orte_mapped_node_t);
/* if src is NULL, then that's all we wanted */
if (NULL == src) return ORTE_SUCCESS;

Просмотреть файл

@ -40,49 +40,48 @@ int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest,
{
int rc;
orte_std_cntr_t i, j, n, num_nodes;
orte_rmaps_base_map_t **maps;
orte_rmaps_base_node_t *node;
orte_job_map_t **maps;
orte_mapped_node_t *node;
/* unpack into array of orte_rmaps_base_map_t objects */
maps = (orte_rmaps_base_map_t**) dest;
/* unpack into array of orte_job_map_t objects */
maps = (orte_job_map_t**) dest;
for (i=0; i < *num_vals; i++) {
/* create the orte_rmaps_base_map_t object */
maps[i] = OBJ_NEW(orte_rmaps_base_map_t);
maps[i] = OBJ_NEW(orte_job_map_t);
if (NULL == maps[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* unpack the app_context */
/* unpack the jobid */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->app), &n, ORTE_APP_CONTEXT))) {
&(maps[i]->job), &n, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the number of procs */
/* unpack the number of app_contexts */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->num_procs), &n, ORTE_STD_CNTR))) {
&(maps[i]->num_apps), &n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if we have some, allocate space for them */
if (0 < maps[i]->num_procs) {
maps[i]->procs = (orte_rmaps_base_proc_t**)malloc(maps[i]->num_procs * sizeof(orte_rmaps_base_proc_t*));
if (NULL == maps[i]->procs) {
/* allocate space for them */
maps[i]->apps = (orte_app_context_t**)malloc(maps[i]->num_apps * sizeof(orte_app_context_t*));
if (NULL == maps[i]->apps) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* and unpack them */
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, maps[i]->procs, &(maps[i]->num_procs), ORTE_MAPPED_PROC))) {
/* unpack the app_context */
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->apps), &(maps[i]->num_apps), ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* unpack the number of nodes */
n = 1;
@ -112,39 +111,23 @@ int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
{
int rc;
orte_std_cntr_t i, n;
orte_rmaps_base_proc_t **procs;
orte_mapped_proc_t **procs;
/* unpack into array of orte_rmaps_base_proc_t objects */
procs = (orte_rmaps_base_proc_t**) dest;
/* unpack into array of orte_mapped_proc_t objects */
procs = (orte_mapped_proc_t**) dest;
for (i=0; i < *num_vals; i++) {
/* create the orte_rmaps_base_proc_t object */
procs[i] = OBJ_NEW(orte_rmaps_base_proc_t);
/* create the orte_mapped_proc_t object */
procs[i] = OBJ_NEW(orte_mapped_proc_t);
if (NULL == procs[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* unpack the app name */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->app), &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the proc_node */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->proc_node), &n, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the proc name */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->proc_name), &n, ORTE_NAME))) {
&(procs[i]->name), &n, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -152,12 +135,12 @@ int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
/* unpack the rank */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->proc_rank), &n, ORTE_STD_CNTR))) {
&(procs[i]->rank), &n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the pls-pid */
/* unpack the pid */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->pid), &n, ORTE_PID))) {
@ -165,10 +148,10 @@ int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
return rc;
}
/* unpack the local pid */
/* unpack the app_idx */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->local_pid), &n, ORTE_PID))) {
&(procs[i]->app_idx), &n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -185,24 +168,56 @@ int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
{
int rc;
orte_std_cntr_t i, j, n, num_procs;
orte_rmaps_base_node_t **nodes;
orte_rmaps_base_proc_t *srcproc;
orte_mapped_node_t **nodes;
orte_mapped_proc_t *srcproc;
/* unpack into array of orte_rmaps_base_node_t objects */
nodes = (orte_rmaps_base_node_t**) dest;
/* unpack into array of orte_mapped_node_t objects */
nodes = (orte_mapped_node_t**) dest;
for (i=0; i < *num_vals; i++) {
/* create the orte_rmaps_base_node_t object */
nodes[i] = OBJ_NEW(orte_rmaps_base_node_t);
nodes[i] = OBJ_NEW(orte_mapped_node_t);
if (NULL == nodes[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* unpack the node object */
/* unpack the cellid */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->node), &n, ORTE_RAS_NODE))) {
&(nodes[i]->cell), &n, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the nodename */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->nodename), &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the username */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->username), &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the daemon's name */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->daemon), &n, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the oversubscribed flag */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->oversubscribed), &n, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -222,7 +237,7 @@ int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc);
return rc;
}
opal_list_append(&(nodes[i]->node_procs), &srcproc->super);
opal_list_append(&(nodes[i]->procs), &srcproc->super);
}
}
}

Просмотреть файл

@ -1,903 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/util/output.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/schema/schema.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/ras.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/smr/smr_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/base/base.h"
/**
* orte_rmaps_base_node_t
*/
static void orte_rmaps_base_node_construct(orte_rmaps_base_node_t* node)
{
node->node = NULL;
OBJ_CONSTRUCT(&node->node_procs, opal_list_t);
}
static void orte_rmaps_base_node_destruct(orte_rmaps_base_node_t* node)
{
opal_list_item_t* item;
if(NULL != node->node) {
OBJ_RELEASE(node->node);
node->node = NULL;
}
while(NULL != (item = opal_list_remove_first(&node->node_procs))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node->node_procs);
}
OBJ_CLASS_INSTANCE(
orte_rmaps_base_node_t,
opal_list_item_t,
orte_rmaps_base_node_construct,
orte_rmaps_base_node_destruct);
/**
* orte_rmaps_base_proc_t
*/
static void orte_rmaps_base_proc_construct(orte_rmaps_base_proc_t* proc)
{
proc->app = NULL;
proc->proc_node = NULL;
proc->pid = 0;
proc->local_pid = 0;
}
static void orte_rmaps_base_proc_destruct(orte_rmaps_base_proc_t* proc)
{
if (NULL != proc->app) {
free(proc->app);
proc->app = NULL;
}
}
OBJ_CLASS_INSTANCE(
orte_rmaps_base_proc_t,
opal_list_item_t,
orte_rmaps_base_proc_construct,
orte_rmaps_base_proc_destruct);
/**
* orte_rmaps_base_map_t
*/
static void orte_rmaps_base_map_construct(orte_rmaps_base_map_t* map)
{
map->app = NULL;
map->procs = NULL;
map->num_procs = 0;
OBJ_CONSTRUCT(&map->nodes, opal_list_t);
}
static void orte_rmaps_base_map_destruct(orte_rmaps_base_map_t* map)
{
orte_std_cntr_t i=0;
opal_list_item_t* item;
for(i=0; i<map->num_procs; i++) {
OBJ_RELEASE(map->procs[i]);
}
while(NULL != (item = opal_list_remove_first(&map->nodes)))
OBJ_RELEASE(item);
if(NULL != map->procs) {
free(map->procs);
map->procs = NULL;
}
if(NULL != map->app) {
OBJ_RELEASE(map->app);
map->app = NULL;
}
OBJ_DESTRUCT(&map->nodes);
}
OBJ_CLASS_INSTANCE(
orte_rmaps_base_map_t,
opal_list_item_t,
orte_rmaps_base_map_construct,
orte_rmaps_base_map_destruct);
/*
* Compare two proc entries
*/
static int orte_rmaps_value_compare(orte_gpr_value_t** val1, orte_gpr_value_t** val2)
{
orte_std_cntr_t i;
orte_std_cntr_t app1 = 0;
orte_std_cntr_t app2 = 0;
orte_std_cntr_t rank1 = 0;
orte_std_cntr_t rank2 = 0;
orte_std_cntr_t *sptr;
orte_gpr_value_t* value;
int rc;
for(i=0, value=*val1; i<value->cnt; i++) {
orte_gpr_keyval_t* keyval = value->keyvals[i];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
}
rank1 = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
}
app1 = *sptr;
continue;
}
}
for(i=0, value=*val2; i<value->cnt; i++) {
orte_gpr_keyval_t* keyval = value->keyvals[i];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
}
rank2 = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
}
app2 = *sptr;
continue;
}
}
if(app1 < app2)
return -1;
if(app1 > app2)
return +1;
if(rank1 < rank2)
return -1;
if(rank1 > rank2)
return +1;
return 0;
}
/**
* Obtain the mapping for this job, and the list of nodes confined to that mapping.
*
* Use this instead of orte_ras_base_node_query when past the RMAPS framework
* since components like the PLS are only conserned with those nodes that they
* been mapped on, not all of the nodes allocated to their job. In the case
* where we are allocated 10 nodes from the RAS, but only map to 2 of them
* then we don't try to launch orteds on all 10 nodes, just the 2 mapped.
*/
int orte_rmaps_base_mapped_node_query(opal_list_t* mapping_list, opal_list_t* nodes_alloc, orte_jobid_t jobid)
{
opal_list_item_t *item_a, *item_m, *item_n;
int num_mapping = 0;
int rc = ORTE_SUCCESS;
bool matched = false;
/* get the mapping for this job */
rc = orte_rmaps_base_get_map(jobid, mapping_list);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
num_mapping = opal_list_get_size(mapping_list);
/* Create a list of nodes that are in the mapping */
for( item_m = opal_list_get_first(mapping_list);
item_m != opal_list_get_end(mapping_list);
item_m = opal_list_get_next(item_m)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item_m;
/* Iterate over all the nodes mapped and check them against the
* allocated node list */
for( item_n = opal_list_get_first(&(map->nodes));
item_n != opal_list_get_end(&(map->nodes));
item_n = opal_list_get_next(item_n)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)item_n;
matched = false;
/* If this node is in the list already, skip it */
if(num_mapping > 1) {
for( item_a = opal_list_get_first(nodes_alloc);
item_a != opal_list_get_end(nodes_alloc);
item_a = opal_list_get_next(item_a)) {
orte_ras_node_t* ras_node = (orte_ras_node_t*)item_a;
if( rmaps_node->node == ras_node) {
matched = true;
break;
}
}
if(matched) {
continue;
}
}
/* Otherwise
* - Add it to the allocated list of nodes
*/
OBJ_RETAIN(rmaps_node->node);
opal_list_append(nodes_alloc, &rmaps_node->node->super);
}
}
return rc;
}
/**
* Lookup node (if it exists) in the list. If it doesn't exist, create a new
* node and append to the table.
*/
static orte_rmaps_base_node_t*
orte_rmaps_lookup_node(opal_list_t* rmaps_nodes, opal_list_t* ras_nodes, char* node_name, orte_rmaps_base_proc_t* proc)
{
opal_list_item_t* item;
for(item = opal_list_get_first(rmaps_nodes);
item != opal_list_get_end(rmaps_nodes);
item = opal_list_get_next(item)) {
orte_rmaps_base_node_t* node = (orte_rmaps_base_node_t*)item;
if(strcmp(node->node->node_name, node_name) == 0) {
OBJ_RETAIN(proc);
opal_list_append(&node->node_procs, &proc->super);
return node;
}
}
for(item = opal_list_get_first(ras_nodes);
item != opal_list_get_end(ras_nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* ras_node = (orte_ras_node_t*)item;
if(strcmp(ras_node->node_name, node_name) == 0) {
orte_rmaps_base_node_t* node = OBJ_NEW(orte_rmaps_base_node_t);
OBJ_RETAIN(ras_node);
node->node = ras_node;
OBJ_RETAIN(proc);
opal_list_append(&node->node_procs, &proc->super);
opal_list_prepend(rmaps_nodes, &node->super);
return node;
}
}
return NULL;
}
/**
* Query the process mapping from the registry.
*/
int orte_rmaps_base_get_map(orte_jobid_t jobid, opal_list_t* mapping_list)
{
orte_app_context_t** app_context = NULL;
orte_rmaps_base_map_t** mapping = NULL;
opal_list_t nodes;
opal_list_item_t* item;
orte_std_cntr_t i, num_context = 0;
orte_std_cntr_t *sptr;
orte_process_name_t *pptr;
pid_t *pidptr;
char* segment = NULL;
orte_gpr_value_t** values;
orte_std_cntr_t v, num_values;
int rc;
char* keys[] = {
ORTE_PROC_RANK_KEY,
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_PROC_PID_KEY,
ORTE_PROC_LOCAL_PID_KEY,
ORTE_NODE_NAME_KEY,
NULL
};
/* query the application context */
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &app_context, &num_context))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* query the node list */
OBJ_CONSTRUCT(&nodes, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(&nodes,jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* build the mapping */
if(NULL == (mapping = (orte_rmaps_base_map_t**)malloc(sizeof(orte_rmaps_base_map_t*) * num_context))) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(i=0; i<num_context; i++) {
orte_rmaps_base_map_t* map = OBJ_NEW(orte_rmaps_base_map_t);
orte_app_context_t* app = app_context[i];
map->app = app;
if (0 < app->num_procs) {
map->procs = (orte_rmaps_base_proc_t**)malloc(sizeof(orte_rmaps_base_proc_t*) * app->num_procs);
if(NULL == map->procs) {
OBJ_RELEASE(map);
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
} else {
if (1 < num_context) { /** can't have multiple contexts if zero num_procs */
ORTE_ERROR_LOG(ORTE_ERR_INVALID_NUM_PROCS);
rc = ORTE_ERR_INVALID_NUM_PROCS;
goto cleanup;
}
}
map->num_procs = 0;
mapping[i] = map;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* query the process list from the registry */
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
segment,
NULL,
keys,
&num_values,
&values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* sort the response */
qsort(values, num_values, sizeof(orte_gpr_value_t*),
(int (*)(const void*,const void*))orte_rmaps_value_compare);
/* build the proc list */
for(v=0; v<num_values; v++) {
orte_gpr_value_t* value = values[v];
orte_rmaps_base_map_t* map = NULL;
orte_rmaps_base_proc_t* proc;
char* node_name = NULL;
orte_std_cntr_t kv, app_index;
proc = OBJ_NEW(orte_rmaps_base_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(kv = 0; kv<value->cnt; kv++) {
orte_gpr_keyval_t* keyval = value->keyvals[kv];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->proc_rank = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->proc_name = *pptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
app_index = *sptr;
if(app_index >= num_context) {
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map = mapping[app_index];
proc->app = strdup(app_context[app_index]->app);
continue;
}
if (strcmp(keyval->key, ORTE_PROC_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->pid = *pidptr;
continue;
}
if (strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->local_pid = *pidptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
}
/* global record */
if(NULL == map) {
OBJ_RELEASE(proc);
continue;
}
/*
* This seems like a dummy check, but it ensures that we fail
* rather than overrun our array. This can happen if the
* indicies on the app schemas are incorrect
*/
if(map->num_procs < map->app->num_procs) {
map->procs[map->num_procs++] = proc;
proc->proc_node = orte_rmaps_lookup_node(&map->nodes, &nodes, node_name, proc);
}
else {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup;
}
}
/* cleanup any nodes allocated and not mapped */
while(NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
/* release temporary variables */
for(i=0; i<num_context; i++) {
opal_list_append(mapping_list, &mapping[i]->super);
}
free(segment);
free(app_context);
free(mapping);
return ORTE_SUCCESS;
cleanup:
if(NULL != segment)
free(segment);
if(NULL != app_context) {
for(i=0; i<num_context; i++) {
OBJ_RELEASE(app_context[i]);
}
free(app_context);
}
if(NULL != mapping) {
for(i=0; i<num_context; i++) {
if(NULL != mapping[i])
OBJ_RELEASE(mapping[i]);
}
free(mapping);
}
/* cleanup any nodes allocated and not mapped */
while(NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
return rc;
}
/**
* Query the process mapping for a specified node from the registry.
*/
int orte_rmaps_base_get_node_map(
orte_cellid_t cellid,
orte_jobid_t jobid,
const char* hostname,
opal_list_t* mapping_list)
{
orte_app_context_t** app_context = NULL;
orte_rmaps_base_map_t** mapping = NULL;
orte_ras_node_t *ras_node = NULL;
orte_gpr_keyval_t *condition;
orte_std_cntr_t i, num_context = 0;
orte_std_cntr_t *sptr;
pid_t *pidptr;
orte_process_name_t *pptr;
char* segment = NULL;
orte_gpr_value_t** values;
orte_std_cntr_t v, num_values;
int rc;
char* keys[] = {
ORTE_PROC_RANK_KEY,
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_PROC_PID_KEY,
ORTE_PROC_LOCAL_PID_KEY,
ORTE_NODE_NAME_KEY,
NULL
};
/* allocate the node */
if(NULL == (ras_node = orte_ras.node_lookup(cellid,hostname))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
/* query the application context */
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &app_context, &num_context))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(NULL == (mapping = (orte_rmaps_base_map_t**)malloc(sizeof(orte_rmaps_base_map_t*) * num_context))) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(i=0; i<num_context; i++) {
orte_rmaps_base_map_t* map = OBJ_NEW(orte_rmaps_base_map_t);
orte_app_context_t* app = app_context[i];
OBJ_RETAIN(app);
map->app = app;
map->procs = (orte_rmaps_base_proc_t**)malloc(sizeof(orte_rmaps_base_proc_t*) * app->num_procs);
if(NULL == map->procs) {
OBJ_RELEASE(map);
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map->num_procs = 0;
mapping[i] = map;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* setup condition/filter for query - return only processes that
* are assigned to the specified node name
*/
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&condition, ORTE_NODE_NAME_KEY, ORTE_STRING, (void*)hostname))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* query the process list from the registry */
rc = orte_gpr.get_conditional(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
segment,
NULL,
keys,
1,
&condition,
&num_values,
&values);
/* sort the response */
qsort(values, num_values, sizeof(orte_gpr_value_t*),
(int (*)(const void*,const void*))orte_rmaps_value_compare);
/* build the proc list */
for(v=0; v<num_values; v++) {
orte_gpr_value_t* value = values[v];
orte_rmaps_base_map_t* map = NULL;
orte_rmaps_base_node_t *node = NULL;
orte_rmaps_base_proc_t* proc;
char* node_name = NULL;
orte_std_cntr_t kv, app_index;
proc = OBJ_NEW(orte_rmaps_base_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(kv = 0; kv<value->cnt; kv++) {
orte_gpr_keyval_t* keyval = value->keyvals[kv];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->proc_rank = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->proc_name = *pptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
app_index = *sptr;
if(app_index >= num_context) {
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map = mapping[app_index];
if(opal_list_get_size(&map->nodes) == 0) {
node = OBJ_NEW(orte_rmaps_base_node_t);
if(NULL == node) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup;
}
OBJ_RETAIN(ras_node);
node->node = ras_node;
opal_list_append(&map->nodes, &node->super);
} else {
node = (orte_rmaps_base_node_t*)opal_list_get_first(&map->nodes);
}
proc->app = strdup(app_context[app_index]->app);
continue;
}
if (strcmp(keyval->key, ORTE_PROC_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->pid = *pidptr;
continue;
}
if (strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->local_pid = *pidptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
}
/* skip this entry? */
if(NULL == map ||
proc->proc_name.cellid != cellid) {
OBJ_RELEASE(proc);
continue;
}
map->procs[map->num_procs++] = proc;
OBJ_RETAIN(proc);
opal_list_append(&node->node_procs, &proc->super);
proc->proc_node = node;
}
/* return mapping for the entries that have procs on this node */
for(i=0; i<num_context; i++) {
orte_rmaps_base_map_t* map = mapping[i];
if(map->num_procs) {
opal_list_append(mapping_list, &map->super);
} else {
OBJ_RELEASE(map);
}
}
/* decrement reference count on node */
OBJ_RELEASE(ras_node);
/* release all app context - note the reference count was bumped
* if saved in the map
*/
for(i=0; i<num_context; i++) {
OBJ_RELEASE(app_context[i]);
}
free(segment);
free(app_context);
free(mapping);
OBJ_RELEASE(condition);
return ORTE_SUCCESS;
cleanup:
if(NULL != segment)
free(segment);
if(NULL != app_context) {
for(i=0; i<num_context; i++) {
OBJ_RELEASE(app_context[i]);
}
free(app_context);
}
if(NULL != mapping) {
for(i=0; i<num_context; i++) {
if(NULL != mapping[i])
OBJ_RELEASE(mapping[i]);
}
free(mapping);
}
if (NULL != condition)
OBJ_RELEASE(condition);
return rc;
}
/**
* Set the process mapping in the registry.
*/
int orte_rmaps_base_set_map(orte_jobid_t jobid, opal_list_t* mapping_list)
{
orte_std_cntr_t i, j;
orte_std_cntr_t index=0;
orte_std_cntr_t num_procs = 0;
int rc = ORTE_SUCCESS;
opal_list_item_t* item;
orte_gpr_value_t** values;
char *segment;
for(item = opal_list_get_first(mapping_list);
item != opal_list_get_end(mapping_list);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item;
num_procs += map->num_procs;
}
if(num_procs == 0) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/**
* allocate value array. We need to reserve one extra spot so we can set the counter
* for the process INIT state to indicate that all procs are at that state. This will
* allow the INIT trigger to fire.
*/
values = (orte_gpr_value_t**)malloc((1+num_procs) * sizeof(orte_gpr_value_t*));
if(NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment,jobid))) {
ORTE_ERROR_LOG(rc);
free(values);
return rc;
}
/** setup the last value in the array to update the INIT counter */
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[num_procs]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 1, 1))) {
ORTE_ERROR_LOG(rc);
free(values);
free(segment);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[num_procs]->keyvals[0]), ORTE_PROC_NUM_AT_INIT, ORTE_STD_CNTR, &num_procs))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
values[num_procs]->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* counter is in the job's globals container */
for(i=0; i<num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 7, 0))) {
ORTE_ERROR_LOG(rc);
for(j=0; j<i; j++) {
OBJ_RELEASE(values[j]);
}
free(values);
free(segment);
return rc;
}
}
/* iterate through all processes and initialize value array */
for(item = opal_list_get_first(mapping_list);
item != opal_list_get_end(mapping_list);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item;
orte_std_cntr_t p;
for(p=0; p<map->num_procs; p++) {
orte_rmaps_base_proc_t* proc = map->procs[p];
orte_gpr_value_t* value = values[index++];
orte_proc_state_t proc_state=ORTE_PROC_STATE_INIT;
/* initialize keyvals */
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_PROC_RANK_KEY, ORTE_STD_CNTR, &(proc->proc_rank)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_PROC_NAME_KEY, ORTE_NAME, &(proc->proc_name)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_NODE_NAME_KEY, ORTE_STRING, proc->proc_node->node->node_name))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &(map->app->idx)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[5]), ORTE_PROC_PID_KEY, ORTE_PID, &(proc->pid)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[6]), ORTE_PROC_LOCAL_PID_KEY, ORTE_PID, &(proc->local_pid)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* set the tokens */
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(value->tokens), &(value->num_tokens), &(proc->proc_name)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
}
/* insert all values in one call */
if (ORTE_SUCCESS != (rc = orte_gpr.put((1+num_procs), values))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
for(i=0; i<num_procs; i++) {
if(NULL != values[i]) {
OBJ_RELEASE(values[i]);
}
}
if(NULL != values)
free(values);
return rc;
}

Просмотреть файл

@ -42,7 +42,7 @@ static orte_rmaps_base_module_t *select_any(void);
* Function for selecting one component from all those that are
* available.
*/
int orte_rmaps_base_map(orte_jobid_t job, char *desired_mapper)
int orte_rmaps_base_map_job(orte_jobid_t job, char *desired_mapper)
{
orte_rmaps_base_module_t *module=NULL;
int rc;

Просмотреть файл

@ -1,35 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/class/opal_list.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
int orte_rmaps_base_map_no_op(orte_jobid_t job, char *desired_mapper)
{
return ORTE_ERR_NOT_SUPPORTED;
}

Просмотреть файл

@ -50,14 +50,17 @@ orte_rmaps_base_t orte_rmaps_base;
* Declare the RMAPS module to hold the API function pointers
*/
orte_rmaps_base_module_t orte_rmaps = {
orte_rmaps_base_map,
orte_rmaps_base_map_job,
orte_rmaps_base_get_job_map,
orte_rmaps_base_get_node_map,
orte_rmaps_base_finalize
};
orte_rmaps_base_module_t orte_rmaps_no_op = {
orte_rmaps_base_map_no_op,
orte_rmaps_base_finalize
};
/*
* Include all the RMAPS class instance declarations
*/
#include "orte/mca/rmaps/base/rmaps_class_instances.h"
/**
* Function for finding and opening either all MCA components, or the one
@ -66,7 +69,7 @@ orte_rmaps_base_module_t orte_rmaps_no_op = {
int orte_rmaps_base_open(void)
{
int param, rc, value;
char *policy, *requested;
char *policy;
orte_data_type_t tmp;
/* Debugging / verbose output */
@ -150,30 +153,7 @@ int orte_rmaps_base_open(void)
}
/* Some systems do not want any RMAPS support. In those cases,
* memory consumption is also an issue. For those systems, we
* avoid opening the RMAPS components by checking for a directive
* to use the "null" component.
*/
param = mca_base_param_reg_string_name("rmaps", NULL, NULL,
false, false, NULL, NULL);
if (ORTE_ERROR == mca_base_param_lookup_string(param, &requested)) {
return ORTE_ERROR;
}
if (NULL != requested && 0 == strcmp(requested, "null")) {
/* the user has specifically requested that we use the "null"
* component. In this case, that means we do NOT open any
* components, and we simply use the default module we have
* already defined above
*/
orte_rmaps_base.no_op_selected = true;
orte_rmaps = orte_rmaps_no_op; /* use the no_op module */
return ORTE_SUCCESS;
}
orte_rmaps_base.no_op_selected = false;
/* Open up all the components that we can find */
if (ORTE_SUCCESS !=
mca_base_components_open("rmaps", orte_rmaps_base.rmaps_output,
mca_rmaps_base_static_components,

413
orte/mca/rmaps/base/rmaps_base_registry_fns.c Обычный файл
Просмотреть файл

@ -0,0 +1,413 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/util/output.h"
#include "opal/util/trace.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/schema/schema.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/smr/smr_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/base/base.h"
/*
* Query the process mapping from the registry.
*/
int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
{
orte_job_map_t *mapping;
orte_mapped_proc_t *proc;
orte_cellid_t *cellptr, cell;
orte_std_cntr_t *sptr;
bool *bptr, oversub;
pid_t *pidptr;
orte_process_name_t *pptr;
char *segment;
char *node_name;
char *username;
orte_gpr_value_t **values, *value;
orte_gpr_keyval_t* keyval;
orte_std_cntr_t v, kv, num_values;
int rc;
char* keys[] = {
ORTE_PROC_RANK_KEY,
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_PROC_LOCAL_PID_KEY,
ORTE_CELLID_KEY,
ORTE_NODE_NAME_KEY,
ORTE_NODE_USERNAME_KEY,
ORTE_NODE_OVERSUBSCRIBED_KEY,
NULL
};
OPAL_TRACE(1);
/* define default answer */
*map = NULL;
/* create the object */
mapping = OBJ_NEW(orte_job_map_t);
if (NULL == mapping) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* store the jobid */
mapping->job = jobid;
/* get the job segment name */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(mapping);
return rc;
}
/* query the application context */
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &(mapping->apps), &(mapping->num_apps)))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* query the process list from the registry */
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
segment,
NULL,
keys,
&num_values,
&values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(mapping);
free(segment);
return rc;
}
free(segment);
/* build the node and proc lists. each value corresponds
* to a process in the map
*/
for(v=0; v<num_values; v++) {
value = values[v];
node_name = NULL;
proc = OBJ_NEW(orte_mapped_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(kv = 0; kv<value->cnt; kv++) {
keyval = value->keyvals[kv];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->rank = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->name = *pptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->app_idx = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->pid = *pidptr;
continue;
}
if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cellptr, keyval->value, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
cell = *cellptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_USERNAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&username, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_OVERSUBSCRIBED_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, keyval->value, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
oversub = *bptr;
continue;
}
}
/* store this process in the map */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(mapping, cell, node_name, username, oversub, proc))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (NULL != node_name) free(node_name);
}
/* all done */
*map = mapping;
return ORTE_SUCCESS;
cleanup:
OBJ_RELEASE(mapping);
for (v=0; v < num_values; v++) {
OBJ_RELEASE(values[v]);
}
if (NULL != values) free(values);
return rc;
}
int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell,
char *nodename, orte_jobid_t job)
{
orte_job_map_t *map;
opal_list_item_t *item;
orte_mapped_node_t *nptr;
int rc;
/* set default answer */
*node = NULL;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_job_map(&map, job))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* scan the map for the indicated node */
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
nptr = (orte_mapped_node_t*)item;
if (cell == nptr->cell && 0 == strcmp(nodename, nptr->nodename)) {
*node = nptr;
/* protect the node object from release when we get rid
* of the map object
*/
opal_list_remove_item(&map->nodes, item);
OBJ_RELEASE(map);
return ORTE_SUCCESS;
}
}
/* if we get here, then the node wasn't found */
OBJ_RELEASE(map);
return ORTE_ERR_NOT_FOUND;
}
/**
* Set the process mapping in the registry.
*/
int orte_rmaps_base_put_job_map(orte_job_map_t *map)
{
orte_std_cntr_t i, j;
orte_std_cntr_t index=0;
orte_std_cntr_t num_procs = 0;
int rc = ORTE_SUCCESS;
opal_list_item_t *item, *item2;
orte_gpr_value_t **values, *value;
char *segment;
orte_mapped_node_t *node;
orte_mapped_proc_t *proc;
orte_proc_state_t proc_state=ORTE_PROC_STATE_INIT;
OPAL_TRACE(2);
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
node = (orte_mapped_node_t*)item;
num_procs += opal_list_get_size(&node->procs);
}
if(num_procs == 0) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/**
* allocate value array. We need to reserve one extra spot so we can set the counter
* for the process INIT state to indicate that all procs are at that state. This will
* allow the INIT trigger to fire.
*/
values = (orte_gpr_value_t**)malloc((1+num_procs) * sizeof(orte_gpr_value_t*));
if(NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, map->job))) {
ORTE_ERROR_LOG(rc);
free(values);
return rc;
}
/** setup the last value in the array to update the INIT counter */
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[num_procs]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 1, 1))) {
ORTE_ERROR_LOG(rc);
free(values);
free(segment);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[num_procs]->keyvals[0]), ORTE_PROC_NUM_AT_INIT, ORTE_STD_CNTR, &num_procs))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
values[num_procs]->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* counter is in the job's globals container */
for(i=0; i<num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 8, 0))) {
ORTE_ERROR_LOG(rc);
for(j=0; j<i; j++) {
OBJ_RELEASE(values[j]);
}
free(values);
free(segment);
return rc;
}
}
/* iterate through all processes and initialize value array */
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
node = (orte_mapped_node_t*)item;
for (item2 = opal_list_get_first(&node->procs);
item2 != opal_list_get_end(&node->procs);
item2 = opal_list_get_next(item2)) {
proc = (orte_mapped_proc_t*)item2;
value = values[index++];
/* initialize keyvals */
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_PROC_RANK_KEY, ORTE_STD_CNTR, &(proc->rank)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_PROC_NAME_KEY, ORTE_NAME, &(proc->name)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_CELLID_KEY, ORTE_CELLID, &(node->cell)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), ORTE_NODE_NAME_KEY, ORTE_STRING, node->nodename))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), ORTE_NODE_USERNAME_KEY, ORTE_STRING, node->username))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[5]), ORTE_NODE_OVERSUBSCRIBED_KEY, ORTE_BOOL, &(node->oversubscribed)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[6]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &(proc->app_idx)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[7]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* set the tokens */
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(value->tokens), &(value->num_tokens), &(proc->name)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
}
/* insert all values in one call */
if (ORTE_SUCCESS != (rc = orte_gpr.put((1+num_procs), values))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
for(i=0; i<num_procs; i++) {
if(NULL != values[i]) {
OBJ_RELEASE(values[i]);
}
}
if(NULL != values)
free(values);
return rc;
}

Просмотреть файл

@ -48,6 +48,7 @@ static bool are_all_mapped_valid(char **mapping,
opal_list_t* nodes)
{
opal_list_item_t *item;
orte_ras_node_t *node;
int i;
bool matched;
@ -57,7 +58,8 @@ static bool are_all_mapped_valid(char **mapping,
for(item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
item = opal_list_get_next(item) ) {
if( 0 == strcmp( ((orte_ras_node_t*) item)->node_name, mapping[i]) ) {
node = (orte_ras_node_t*) item;
if( 0 == strcmp(node->node_name, mapping[i]) ) {
matched = true;
break;
}
@ -94,7 +96,7 @@ static bool is_mapped(opal_list_item_t *item,
/*
* Query the registry for all nodes allocated to a specified job
*/
int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots)
int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots)
{
opal_list_item_t *item, *next;
orte_ras_node_t *node;
@ -104,7 +106,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
/** set default answer */
*total_num_slots = 0;
if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(nodes, jobid))) {
/* get the allocation for this job */
if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(allocated_nodes, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -115,21 +118,21 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
id = mca_base_param_find("rmaps", NULL, "base_schedule_local");
mca_base_param_lookup_int(id, &nolocal);
if (0 == nolocal) {
for (item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
for (item = opal_list_get_first(allocated_nodes);
item != opal_list_get_end(allocated_nodes);
item = opal_list_get_next(item) ) {
if (0 == strcmp(((orte_ras_node_t *) item)->node_name,
orte_system_info.nodename) ||
opal_ifislocal(((orte_ras_node_t *) item)->node_name)) {
opal_list_remove_item(nodes, item);
node = (orte_ras_node_t*)item;
if (0 == strcmp(node->node_name, orte_system_info.nodename) ||
opal_ifislocal(node->node_name)) {
opal_list_remove_item(allocated_nodes, item);
break;
}
}
}
/** remove all nodes that are already at max usage */
item = opal_list_get_first(nodes);
while (item != opal_list_get_end(nodes)) {
item = opal_list_get_first(allocated_nodes);
while (item != opal_list_get_end(allocated_nodes)) {
/** save the next pointer in case we remove this node */
next = opal_list_get_next(item);
@ -137,8 +140,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
/** check to see if this node is fully used - remove if so */
node = (orte_ras_node_t*)item;
if (0 != node->node_slots_max && node->node_slots_inuse > node->node_slots_max) {
opal_list_remove_item(nodes, item);
} else { /** otherwise, add its slots to the total */
opal_list_remove_item(allocated_nodes, item);
} else { /** otherwise, add the slots for our job to the total */
num_slots += node->node_slots;
}
@ -146,8 +149,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
item = next;
}
/* Sanity check to make sure we have been allocated nodes */
if (0 == opal_list_get_size(nodes)) {
/* Sanity check to make sure we have resources available */
if (0 == opal_list_get_size(allocated_nodes)) {
ORTE_ERROR_LOG(ORTE_ERR_TEMP_OUT_OF_RESOURCE);
return ORTE_ERR_TEMP_OUT_OF_RESOURCE;
}
@ -245,67 +248,108 @@ int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
}
/*
* Claim a slot for a specified job on a node
*/
int orte_rmaps_base_claim_slot(orte_rmaps_base_map_t *map,
orte_ras_node_t *current_node,
orte_jobid_t jobid, orte_vpid_t vpid,
int proc_index,
opal_list_t *nodes,
opal_list_t *fully_used_nodes)
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename,
char *username, bool oversubscribed, orte_mapped_proc_t *proc)
{
orte_rmaps_base_proc_t *proc;
orte_process_name_t *proc_name;
orte_rmaps_base_node_t *rmaps_node;
int rc;
opal_list_item_t *item;
orte_mapped_node_t *node;
/* create objects */
rmaps_node = OBJ_NEW(orte_rmaps_base_node_t);
if (NULL == rmaps_node) {
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
node = (orte_mapped_node_t*)item;
if (cell == node->cell && 0 == strcmp(nodename, node->nodename)) {
/* node was found - add this proc to that list */
opal_list_append(&node->procs, &proc->super);
/* set the oversubscribed flag */
node->oversubscribed = oversubscribed;
return ORTE_SUCCESS;
}
}
/* node was NOT found - add this one to the list */
node = OBJ_NEW(orte_mapped_node_t);
if (NULL == node) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
OBJ_RETAIN(current_node);
rmaps_node->node = current_node;
proc = OBJ_NEW(orte_rmaps_base_proc_t);
node->cell = cell;
node->nodename = strdup(nodename);
if (NULL != username) {
node->username = strdup(username);
}
node->oversubscribed = oversubscribed;
opal_list_append(&map->nodes, &node->super);
/* and add this proc to the new node's list of procs */
opal_list_append(&node->procs, &proc->super);
return ORTE_SUCCESS;
}
/*
* Claim a slot for a specified job on a node
*/
int orte_rmaps_base_claim_slot(orte_job_map_t *map,
orte_ras_node_t *current_node,
orte_jobid_t jobid, orte_vpid_t vpid,
orte_std_cntr_t app_idx,
opal_list_t *nodes,
opal_list_t *fully_used_nodes)
{
orte_process_name_t *name;
orte_mapped_proc_t *proc;
bool oversub;
int rc;
/* create mapped_proc object */
proc = OBJ_NEW(orte_mapped_proc_t);
if (NULL == proc) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(rmaps_node);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* create the process name as an offset from the vpid-start */
rc = orte_ns.create_process_name(&proc_name, current_node->node_cellid,
rc = orte_ns.create_process_name(&name, current_node->node_cellid,
jobid, vpid);
if (rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(proc);
OBJ_RELEASE(rmaps_node);
return rc;
}
proc->proc_node = rmaps_node;
proc->proc_name = *proc_name;
proc->proc_rank = vpid;
orte_ns.free_name(&proc_name);
OBJ_RETAIN(proc); /* bump reference count for the node */
opal_list_append(&rmaps_node->node_procs, &proc->super);
map->procs[proc_index] = proc;
/* Save this node on the map */
opal_list_append(&map->nodes, &rmaps_node->super);
proc->name = *name;
proc->rank = vpid;
proc->app_idx = app_idx;
/* Be sure to demarcate this slot as claimed for the node */
current_node->node_slots_inuse++;
/* see if this node is oversubscribed now */
if (current_node->node_slots_inuse >= current_node->node_slots) {
oversub = true;
} else {
oversub = false;
}
/* add the proc to the map */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(map, current_node->node_cellid,
current_node->node_name,
current_node->node_username,
oversub, proc))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(proc);
return rc;
}
/* Remove this node if it has reached its max number of allocatable slots OR it has
* reached the soft limit AND we are in a "no oversubscribe" state
*/
if ((0 != current_node->node_slots_max &&
current_node->node_slots_inuse >= current_node->node_slots_max) ||
(!orte_rmaps_base.oversubscribe &&
current_node->node_slots_inuse >= current_node->node_slots)) {
(!orte_rmaps_base.oversubscribe && oversub)) {
opal_list_remove_item(nodes, (opal_list_item_t*)current_node);
/* add it to the list of fully used nodes */
opal_list_append(fully_used_nodes, &current_node->super);

142
orte/mca/rmaps/base/rmaps_class_instances.h Обычный файл
Просмотреть файл

@ -0,0 +1,142 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*/
#ifndef ORTE_MCA_RMAPS_CLASS_INST_H
#define ORTE_MCA_RMAPS_CLASS_INST_H
/*
* includes
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/rmaps.h"
/*
* Functions for use solely within the RMAPS framework
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* orte_mapped_proc_t
*/
static void orte_rmaps_mapped_proc_construct(orte_mapped_proc_t* proc)
{
proc->name.cellid = ORTE_CELLID_INVALID;
proc->name.jobid = ORTE_JOBID_INVALID;
proc->name.vpid = ORTE_VPID_INVALID;
proc->rank = 0;
proc->app_idx = 0;
proc->pid = 0;
}
OBJ_CLASS_INSTANCE(orte_mapped_proc_t,
opal_list_item_t,
orte_rmaps_mapped_proc_construct, NULL);
/*
* orte_mapped_node_t
*/
static void orte_rmaps_mapped_node_construct(orte_mapped_node_t* node)
{
node->nodename = NULL;
node->username = NULL;
node->daemon = NULL;
node->oversubscribed = false;
OBJ_CONSTRUCT(&node->procs, opal_list_t);
}
static void orte_rmaps_mapped_node_destruct(orte_mapped_node_t* node)
{
opal_list_item_t* item;
if (NULL != node->nodename) {
free(node->nodename);
}
if (NULL != node->username) {
free(node->username);
}
if (NULL != node->daemon) {
free(node->daemon);
}
while (NULL != (item = opal_list_remove_first(&node->procs))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node->procs);
}
OBJ_CLASS_INSTANCE(orte_mapped_node_t,
opal_list_item_t,
orte_rmaps_mapped_node_construct,
orte_rmaps_mapped_node_destruct);
/*
* orte_job_map_t
*/
static void orte_rmaps_job_map_construct(orte_job_map_t* map)
{
map->job = ORTE_JOBID_INVALID;
map->num_apps = 0;
map->apps = NULL;
OBJ_CONSTRUCT(&map->nodes, opal_list_t);
}
static void orte_rmaps_job_map_destruct(orte_job_map_t* map)
{
orte_std_cntr_t i=0;
opal_list_item_t* item;
for(i=0; i < map->num_apps; i++) {
if (NULL != map->apps[i]) OBJ_RELEASE(map->apps[i]);
}
if (NULL != map->apps) {
free(map->apps);
}
while (NULL != (item = opal_list_remove_first(&map->nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&map->nodes);
}
OBJ_CLASS_INSTANCE(orte_job_map_t,
opal_list_item_t,
orte_rmaps_job_map_construct,
orte_rmaps_job_map_destruct);
/*
* external API functions will be documented in the mca/rmaps/rmaps.h file
*/
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -30,6 +30,7 @@
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/rmaps.h"
@ -67,15 +68,47 @@ OBJ_CLASS_DECLARATION(orte_rmaps_base_cmp_t);
/*
* Base functions
* Base API functions
*/
int orte_rmaps_base_map(orte_jobid_t job, char *desired_mapper);
/*
* NO_OP functions
* Map a job
* All calls to rmaps.map_job are routed through this function. This allows callers to
* the RMAPS framework to specify the particular mapper they wish to use.
*/
int orte_rmaps_base_map_no_op(orte_jobid_t job, char *desired_mapper);
int orte_rmaps_base_map_job(orte_jobid_t job, char *desired_mapper);
/*
* Get job map
* Retrieve the information for a job map from the registry and reassemble it into
* an job_map object. Memory for the job_map object and all of its elements is
* allocated by the function
*/
ORTE_DECLSPEC int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t job);
/*
* Get node map
* Retrieve the information for a job map from the registry and provide the info
* for the specified node
*/
ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell,
char *nodename, orte_jobid_t job);
/*
* Registry functions for maps
*/
/*
* Put job map
* Given a pointer to an orte_job_map_t, place the map's information on
* the registry. Info is entered into the containers for each individual process on
* the job's segment. Additionally, the function sets the INIT counter to the number
* of processes in the map, thus causing the INIT trigger to fire so that any
* attached subscriptions can be serviced.
*/
ORTE_DECLSPEC int orte_rmaps_base_put_job_map(orte_job_map_t *map);
/*
* communication functions
@ -89,22 +122,27 @@ void orte_rmaps_base_recv(int status, orte_process_name_t* sender,
/*
* Internal support functions
*/
ORTE_DECLSPEC int orte_rmaps_base_mapped_node_query(opal_list_t* mapping_list, opal_list_t* nodes_alloc, orte_jobid_t jobid);
ORTE_DECLSPEC int orte_rmaps_base_get_map(orte_jobid_t, opal_list_t* mapping);
ORTE_DECLSPEC int orte_rmaps_base_set_map(orte_jobid_t, opal_list_t* mapping);
ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_cellid_t, orte_jobid_t, const char*, opal_list_t* mapping);
/*
* Function to add a mapped_proc entry to a map
* Scans list of nodes on map to see if the specified one already
* exists - if so, just add this entry to that node's list of
* procs. If not, then add new node entry and put this proc
* on its list.
*/
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename,
char *username, bool oversubscribed, orte_mapped_proc_t *proc);
ORTE_DECLSPEC int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots);
ORTE_DECLSPEC int orte_rmaps_base_update_node_usage(opal_list_t *nodes);
ORTE_DECLSPEC int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots);
int orte_rmaps_base_update_node_usage(opal_list_t *nodes);
int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
orte_app_context_t *app,
opal_list_t *master_node_list,
orte_std_cntr_t *total_num_slots);
ORTE_DECLSPEC int orte_rmaps_base_claim_slot(orte_rmaps_base_map_t *map,
int orte_rmaps_base_claim_slot(orte_job_map_t *map,
orte_ras_node_t *current_node,
orte_jobid_t jobid, orte_vpid_t vpid,
int proc_index,
orte_std_cntr_t app_idx,
opal_list_t *nodes,
opal_list_t *fully_used_nodes);
@ -112,32 +150,32 @@ ORTE_DECLSPEC int orte_rmaps_base_claim_slot(orte_rmaps_base_map_t *map,
void orte_rmaps_base_std_obj_release(orte_data_value_t *value);
/* JOB_MAP */
int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_map(orte_rmaps_base_map_t *value1, orte_rmaps_base_map_t *value2, orte_data_type_t type);
int orte_rmaps_base_copy_map(orte_job_map_t **dest, orte_job_map_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_map(orte_job_map_t *value1, orte_job_map_t *value2, orte_data_type_t type);
int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type);
int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t *src, orte_data_type_t type);
int orte_rmaps_base_size_map(size_t *size, orte_rmaps_base_map_t *src, orte_data_type_t type);
int orte_rmaps_base_print_map(char **output, char *prefix, orte_job_map_t *src, orte_data_type_t type);
int orte_rmaps_base_size_map(size_t *size, orte_job_map_t *src, orte_data_type_t type);
int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type);
/* MAPPED_PROC */
int orte_rmaps_base_copy_mapped_proc(orte_rmaps_base_proc_t **dest, orte_rmaps_base_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_mapped_proc(orte_rmaps_base_proc_t *value1, orte_rmaps_base_proc_t *value2, orte_data_type_t type);
int orte_rmaps_base_copy_mapped_proc(orte_mapped_proc_t **dest, orte_mapped_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_mapped_proc(orte_mapped_proc_t *value1, orte_mapped_proc_t *value2, orte_data_type_t type);
int orte_rmaps_base_pack_mapped_proc(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type);
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_base_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_rmaps_base_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_mapped_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_mapped_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type);
/* MAPPED_NODE */
int orte_rmaps_base_copy_mapped_node(orte_rmaps_base_node_t **dest, orte_rmaps_base_node_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_mapped_node(orte_rmaps_base_node_t *value1, orte_rmaps_base_node_t *value2, orte_data_type_t type);
int orte_rmaps_base_copy_mapped_node(orte_mapped_node_t **dest, orte_mapped_node_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_mapped_node(orte_mapped_node_t *value1, orte_mapped_node_t *value2, orte_data_type_t type);
int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type);
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_base_node_t *src, orte_data_type_t type);
int orte_rmaps_base_size_mapped_node(size_t *size, orte_rmaps_base_node_t *src, orte_data_type_t type);
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_node_t *src, orte_data_type_t type);
int orte_rmaps_base_size_mapped_node(size_t *size, orte_mapped_node_t *src, orte_data_type_t type);
int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type);

Просмотреть файл

@ -69,6 +69,8 @@ orte_rmaps_base_component_t mca_rmaps_proxy_component = {
*/
static orte_rmaps_base_module_t orte_rmaps_proxy = {
orte_rmaps_proxy_map,
orte_rmaps_base_get_job_map,
orte_rmaps_base_get_node_map,
orte_rmaps_proxy_finalize
};

Просмотреть файл

@ -58,6 +58,18 @@
*/
typedef int (*orte_rmaps_base_module_map_fn_t)(orte_jobid_t job, char *desired_mapper);
/**
* Get the map of a job from the registry
*/
typedef int (*orte_rmaps_base_module_get_job_map_fn_t)(orte_job_map_t **map, orte_jobid_t job);
/**
* Get the map for a job on a specific node from the registry. Providing a jobid of
* ORTE_JOBID_WILDCARD will return the map of all processes on that node
*/
typedef int (*orte_rmaps_base_module_get_node_map_fn_t)(orte_mapped_node_t **node, orte_cellid_t cell,
char *nodename, orte_jobid_t job);
/**
* Cleanup module resources.
*/
@ -67,8 +79,12 @@ typedef int (*orte_rmaps_base_module_finalize_fn_t)(void);
* rmaps module version 1.3.0
*/
struct orte_rmaps_base_module_1_3_0_t {
/** Maping function pointer */
/** Mapping function pointer */
orte_rmaps_base_module_map_fn_t map_job;
/** Get job map pointer */
orte_rmaps_base_module_get_job_map_fn_t get_job_map;
/** Node map pointer */
orte_rmaps_base_module_get_node_map_fn_t get_node_map;
/** Finalization function pointer */
orte_rmaps_base_module_finalize_fn_t finalize;
};

Просмотреть файл

@ -24,11 +24,7 @@
#include "orte/orte_constants.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rmgr/rmgr_types.h"
/*
* General MAP types
@ -37,54 +33,50 @@
extern "C" {
#endif
/**** JOB_MAP OBJECTS ***/
/*
* Mapped process info for job_map
*/
struct orte_mapped_proc_t {
opal_list_item_t super;
orte_process_name_t name; /* process name */
orte_std_cntr_t rank; /* process rank */
orte_std_cntr_t app_idx; /* index of app_context for this process */
pid_t pid;
};
typedef struct orte_mapped_proc_t orte_mapped_proc_t;
OBJ_CLASS_DECLARATION(orte_mapped_proc_t);
/*
* Mapping of nodes to process ranks.
*/
struct orte_rmaps_base_node_t {
struct orte_mapped_node_t {
opal_list_item_t super;
orte_ras_node_t* node;
opal_list_t node_procs; /* list of rmaps_base_proc_t */
};
typedef struct orte_rmaps_base_node_t orte_rmaps_base_node_t;
OBJ_CLASS_DECLARATION(orte_rmaps_base_node_t);
/*
* Mapping of a process rank to a specific node.
orte_cellid_t cell; /* cell where this node is located */
char *nodename; /* name of node */
char *username;
orte_process_name_t *daemon; /* name of the daemon on this node
* NULL => daemon not assigned yet
*/
struct orte_rmaps_base_proc_t {
opal_list_item_t super;
char *app; /* name of executable */
orte_rmaps_base_node_t* proc_node;
orte_process_name_t proc_name;
orte_std_cntr_t proc_rank;
pid_t pid; /* PLS-assigned pid */
pid_t local_pid; /* pid found by local process */
bool oversubscribed; /* whether or not the #procs > #processors */
opal_list_t procs; /* list of mapped_proc objects on this node */
};
typedef struct orte_rmaps_base_proc_t orte_rmaps_base_proc_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_proc_t);
typedef struct orte_mapped_node_t orte_mapped_node_t;
OBJ_CLASS_DECLARATION(orte_mapped_node_t);
/*
* Structure that represents the mapping of an application to an
* Structure that represents the mapping of a job to an
* allocated set of resources.
*/
struct orte_rmaps_base_map_t {
opal_list_item_t super;
orte_app_context_t *app;
orte_rmaps_base_proc_t** procs;
orte_std_cntr_t num_procs;
opal_list_t nodes; /* list of rmaps_base_node_t */
struct orte_job_map_t {
opal_object_t super;
orte_jobid_t job;
orte_std_cntr_t num_apps; /* number of app_contexts */
orte_app_context_t **apps; /* the array of app_contexts for this job */
opal_list_t nodes; /* list of mapped_node_t */
};
typedef struct orte_rmaps_base_map_t orte_rmaps_base_map_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_map_t);
typedef struct orte_job_map_t orte_job_map_t;
OBJ_CLASS_DECLARATION(orte_job_map_t);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -31,6 +31,7 @@
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/output.h"
#include "opal/util/trace.h"
#include "opal/util/show_help.h"
#include "opal/util/argv.h"
@ -56,7 +57,7 @@ static opal_list_t fully_used_nodes;
*/
static int map_app_by_node(
orte_app_context_t* app,
orte_rmaps_base_map_t* map,
orte_job_map_t* map,
orte_jobid_t jobid,
orte_vpid_t vpid_start,
opal_list_t* nodes,
@ -67,6 +68,7 @@ static int map_app_by_node(
opal_list_item_t *next;
orte_ras_node_t *node;
OPAL_TRACE(2);
/* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of
@ -110,7 +112,7 @@ static int map_app_by_node(
/* Allocate a slot on this node */
node = (orte_ras_node_t*) cur_node_item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, num_alloc,
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, app->idx,
nodes, max_used_nodes))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -121,8 +123,6 @@ static int map_app_by_node(
cur_node_item = next;
}
map->num_procs = num_alloc;
return ORTE_SUCCESS;
}
@ -133,7 +133,7 @@ static int map_app_by_node(
*/
static int map_app_by_slot(
orte_app_context_t* app,
orte_rmaps_base_map_t* map,
orte_job_map_t* map,
orte_jobid_t jobid,
orte_vpid_t vpid_start,
opal_list_t* nodes,
@ -145,6 +145,7 @@ static int map_app_by_slot(
orte_ras_node_t *node;
opal_list_item_t *next;
OPAL_TRACE(2);
/* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of
@ -195,7 +196,7 @@ static int map_app_by_slot(
num_slots_to_take = (node->node_slots == 0) ? 1 : node->node_slots;
for( i = 0; i < num_slots_to_take; ++i) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, num_alloc,
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, app->idx,
nodes, max_used_nodes))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop
@ -223,8 +224,6 @@ static int map_app_by_slot(
}
map->num_procs = num_alloc;
return ORTE_SUCCESS;
}
@ -235,11 +234,10 @@ static int map_app_by_slot(
static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
{
orte_app_context_t** context, *app;
orte_rmaps_base_map_t* map;
orte_std_cntr_t i, num_context;
orte_app_context_t *app;
orte_job_map_t* map;
orte_std_cntr_t i;
opal_list_t master_node_list, mapped_node_list, max_used_nodes, *working_node_list;
opal_list_t mapping;
opal_list_item_t *item, *item2;
orte_ras_node_t *node, *node2;
orte_vpid_t vpid_start, job_vpid_start=0;
@ -247,8 +245,20 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
int rc;
bool bynode = true, modify_app_context = false;
OPAL_TRACE(1);
/* create the map object */
map = OBJ_NEW(orte_job_map_t);
if (NULL == map) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* set the jobid */
map->job = jobid;
/* query for the application context and allocated nodes */
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &context, &num_context))) {
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &(map->apps), &(map->num_apps)))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -271,11 +281,6 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
return rc;
}
/* construct a mapping for the job - the list will hold mappings for each
* application context
*/
OBJ_CONSTRUCT(&mapping, opal_list_t);
/** initialize the cur_node_item to point to the first node in the list */
cur_node_item = opal_list_get_first(&master_node_list);
@ -298,30 +303,20 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
/** construct a list to hold any nodes involved in a user-specified mapping */
OBJ_CONSTRUCT(&mapped_node_list, opal_list_t);
for(i=0; i<num_context; i++) {
app = context[i];
for(i=0; i < map->num_apps; i++) {
app = map->apps[i];
/** if the number of processes wasn't specified, then we know there can be only
* one app_context allowed in the launch, and that we are to launch it across
* all available slots. We'll double-check the single app_context rule first
*/
if (0 == app->num_procs && 1 < num_context) {
if (0 == app->num_procs && 1 < map->num_apps) {
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np",
true, num_context, NULL);
true, map->num_apps, NULL);
ORTE_ERROR_LOG(ORTE_ERR_INVALID_NUM_PROCS);
return ORTE_ERR_INVALID_NUM_PROCS;
}
/** create a map for this app_context */
map = OBJ_NEW(orte_rmaps_base_map_t);
if(NULL == map) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/** add it to the list of mappings for the job */
opal_list_append(&mapping, &map->super);
if ( 0 < app->num_map ) {
/** If the user has specified a mapping for this app_context, then we
* create a working node list that contains only those nodes.
@ -355,15 +350,6 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
}
}
map->app = app;
map->procs = (orte_rmaps_base_proc_t**)malloc(sizeof(orte_rmaps_base_proc_t*) * app->num_procs);
if(NULL == map->procs) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/* allocate a vpid range for this app within the job */
if(ORTE_SUCCESS != (rc = orte_ns.reserve_range(jobid, app->num_procs, &vpid_start))) {
ORTE_ERROR_LOG(rc);
@ -467,7 +453,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
}
/* save mapping to the registry */
if(ORTE_SUCCESS != (rc = orte_rmaps_base_set_map(jobid, &mapping))) {
if(ORTE_SUCCESS != (rc = orte_rmaps_base_put_job_map(map))) {
goto cleanup;
}
@ -493,7 +479,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
processes
*/
if (modify_app_context) {
if (ORTE_SUCCESS != (rc = orte_rmgr.store_app_context(jobid, context, 1))) {
if (ORTE_SUCCESS != (rc = orte_rmgr.store_app_context(jobid, map->apps, 1))) {
ORTE_ERROR_LOG(rc);
}
}
@ -505,11 +491,6 @@ cleanup:
}
OBJ_DESTRUCT(&master_node_list);
while(NULL != (item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping);
OBJ_DESTRUCT(&max_used_nodes);
OBJ_DESTRUCT(&fully_used_nodes);
OBJ_DESTRUCT(&mapped_node_list);
@ -526,6 +507,8 @@ static int orte_rmaps_rr_finalize(void)
orte_rmaps_base_module_t orte_rmaps_round_robin_module = {
orte_rmaps_rr_map,
orte_rmaps_base_get_job_map,
orte_rmaps_base_get_node_map,
orte_rmaps_rr_finalize
};

Просмотреть файл

@ -62,6 +62,7 @@
#define ORTE_NODE_ALLOC_KEY "orte-node-alloc"
#define ORTE_NODE_BOOTPROXY_KEY "orte-node-bootproxy"
#define ORTE_NODE_USERNAME_KEY "orte-node-username"
#define ORTE_NODE_OVERSUBSCRIBED_KEY "orte-node-oversubscribed"
#define ORTE_JOB_APP_CONTEXT_KEY "orte-job-app-context"
#define ORTE_JOB_SLOTS_KEY "orte-job-slots" /**< number of procs in job */
#define ORTE_JOB_VPID_START_KEY "orte-job-vpid-start"

Просмотреть файл

@ -66,7 +66,7 @@ extern char **environ;
#include "opal/mca/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/rmgr_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/runtime/runtime.h"
#include "totalview.h"
@ -333,8 +333,11 @@ void orte_totalview_init_before_spawn(void)
*/
void orte_totalview_init_after_spawn(orte_jobid_t jobid)
{
opal_list_t list_of_resource_maps;
opal_list_item_t *item;
orte_job_map_t *map;
opal_list_item_t *item, *item2;
orte_mapped_node_t *node;
orte_mapped_proc_t *proc;
orte_app_context_t *appctx;
orte_std_cntr_t i;
int rc;
@ -364,23 +367,18 @@ void orte_totalview_init_after_spawn(orte_jobid_t jobid)
MPIR_debug_state = 1;
OBJ_CONSTRUCT(&list_of_resource_maps, opal_list_t);
/* Get the resource map for this job */
/* Get a list of the resource maps for this job */
rc = orte_rmaps_base_get_map(jobid, &list_of_resource_maps);
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) {
opal_output(0, "Error: Can't get list of resource maps\n");
opal_output(0, "Error: Can't get resource map\n");
ORTE_ERROR_LOG(rc);
}
/* find the total number of processes in the job */
for (item = opal_list_get_first(&list_of_resource_maps);
item != opal_list_get_end(&list_of_resource_maps);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t *map = (orte_rmaps_base_map_t*) item;
MPIR_proctable_size += map->num_procs;
for (i=0; i < map->num_apps; i++) {
MPIR_proctable_size += map->apps[i]->num_procs;
}
/* allocate MPIR_proctable */
@ -389,27 +387,34 @@ void orte_totalview_init_after_spawn(orte_jobid_t jobid)
MPIR_proctable_size);
if (MPIR_proctable == NULL) {
opal_output(0, "Error: Out of memory\n");
OBJ_DESTRUCT(&list_of_resource_maps);
OBJ_RELEASE(map);
}
/* initialize MPIR_proctable */
for (item = opal_list_get_first(&list_of_resource_maps);
item != opal_list_get_end(&list_of_resource_maps);
i=0;
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t *map = (orte_rmaps_base_map_t*) item;
for (i = 0; i < map->num_procs; i++) {
orte_rmaps_base_proc_t *proc = map->procs[i];
MPIR_proctable[i].host_name = proc->proc_node->node->node_name;
node = (orte_mapped_node_t*)item;
for (item2 = opal_list_get_first(&node->procs);
item2 != opal_list_get_end(&node->procs);
item2 = opal_list_get_next(item2)) {
proc = (orte_mapped_proc_t*)item2;
appctx = map->apps[proc->app_idx];
MPIR_proctable[i].host_name = strdup(node->nodename);
MPIR_proctable[i].executable_name =
opal_os_path( false, map->app->cwd, proc->app, NULL );
MPIR_proctable[i].pid = proc->local_pid;
opal_os_path( false, appctx->cwd, appctx->app, NULL );
MPIR_proctable[i].pid = proc->pid;
i++;
}
}
OBJ_DESTRUCT(&list_of_resource_maps);
OBJ_RELEASE(map);
}
if (orte_debug_flag) {
dump();
}