1
1

Bring the map fixes into the main trunk. This should fix several problems, including the multiple app_context issue.

I have tested on rsh, slurm, bproc, and tm. Bproc continues to have a problem (will be asking for help there).

Gridengine compiles but I cannot test (believe it likely will run).

Poe and xgrid compile to the extent they can without the proper include files.

This commit was SVN r12059.
Этот коммит содержится в:
Ralph Castain 2006-10-07 15:45:24 +00:00
родитель 5dbe5c7442
Коммит ae79894bad
34 изменённых файлов: 2176 добавлений и 2727 удалений

Просмотреть файл

@ -259,7 +259,7 @@ int orte_errmgr_bproc_register_job(orte_jobid_t job)
}
/* send the request */
if (0 > orte_rml.send_buffer(orte_errmgr_proxy_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) {
if (0 > orte_rml.send_buffer(orte_errmgr_bproc_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(cmd);
return ORTE_ERR_COMM_FAILURE;
@ -274,7 +274,7 @@ int orte_errmgr_bproc_register_job(orte_jobid_t job)
}
/* enter a blocking receive until we hear back */
if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_ERRMGR)) {
if (0 > orte_rml.recv_buffer(orte_errmgr_bproc_globals.replica, answer, ORTE_RML_TAG_ERRMGR)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -65,9 +65,14 @@ int orte_ns_base_print_name(char **output, char *prefix, orte_process_name_t *na
/* set default result */
*output = NULL;
asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: [%lu,%lu,%lu]",
if (NULL == name) {
asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: NULL",
(NULL == prefix ? " " : prefix));
} else {
asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: [%lu,%lu,%lu]",
(NULL == prefix ? " " : prefix), (unsigned long)name->cellid,
(unsigned long)name->jobid, (unsigned long)name->vpid);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -137,15 +137,3 @@ int orte_odls_bproc_component_close(void)
OBJ_DESTRUCT(&mca_odls_bproc_component.children);
return ORTE_SUCCESS;
}
int orte_odls_bproc_component_finalize(void)
{
opal_list_item_t *item;
/* cleanup state */
while (NULL != (item = opal_list_remove_first(&mca_odls_bproc_component.children))) {
OBJ_RELEASE(item);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -53,7 +53,7 @@ OBJ_CLASS_INSTANCE(orte_pls_daemon_info_t, /* type name */
/*
* Store the active daemons for a job
*/
int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
int orte_pls_base_store_active_daemons(opal_list_t *daemons)
{
orte_pls_daemon_info_t *dmn;
opal_list_item_t *item;
@ -63,6 +63,10 @@ int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
/* determine the number of daemons */
num_daemons = opal_list_get_size(daemons);
if (0 == num_daemons) {
return ORTE_SUCCESS;
}
/* since each daemon gets recorded in a separate node's container,
* we need to allocate space for num_daemons value objects
@ -74,15 +78,6 @@ int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
}
memset(values, 0, num_daemons*sizeof(orte_gpr_value_t*)); /* NULL the array */
/* setup the key */
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, job))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
return rc;
}
asprintf(&key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);
free(jobid_string);
/* loop through the values and the list and create all the value objects */
item = opal_list_get_first(daemons);
for (i=0; i < num_daemons; i++) {
@ -102,6 +97,15 @@ int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
goto CLEANUP;
}
/* setup the key */
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, dmn->active_job))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
return rc;
}
asprintf(&key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);
free(jobid_string);
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[0]), key, ORTE_NAME, dmn->name))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
@ -140,7 +144,10 @@ int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job)
NULL
};
orte_cellid_t *cell;
char *nodename;
orte_process_name_t *name;
orte_pls_daemon_info_t *dmn;
bool found_name, found_node, found_cell;
int rc;
/* setup the key */
@ -164,27 +171,29 @@ int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job)
/* loop through the answers and construct the list */
for (i=0; i < cnt; i++) {
/* each container should have only one set of values */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
if (NULL == dmn) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto CLEANUP;
}
/* for systems such as bproc, the node segment holds containers
* for nodes that we may not have launched upon. Each container
* will send us back a value object, so we have to ensure here
* that we only create daemon objects on the list for those nodes
* that DO provide a valid object
*/
found_name = found_node = found_cell = false;
for (j=0; j < values[i]->cnt; j++) {
kv = values[i]->keyvals[j];
if (0 == strcmp(kv->key, keys[0])) {
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), kv->value->data, ORTE_NAME))) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&name, kv->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
found_name = true;
continue;
}
if (0 == strcmp(kv->key, ORTE_NODE_NAME_KEY)) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->nodename), kv->value->data, ORTE_STRING))) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&nodename, kv->value, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
found_node = true;
continue;
}
if (0 == strcmp(kv->key, ORTE_CELLID_KEY)) {
@ -192,12 +201,32 @@ int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job)
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
dmn->cell = *cell;
found_cell = true;
continue;
}
}
/* add this daemon to the list */
opal_list_append(daemons, &dmn->super);
/* if we found everything, then this is a valid entry - create
* it and add it to the list
*/
if (found_name && found_node && found_cell) {
dmn = OBJ_NEW(orte_pls_daemon_info_t);
if (NULL == dmn) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(dmn);
goto CLEANUP;
}
dmn->cell = *cell;
if (NULL != nodename) {
dmn->nodename = strdup(nodename);
}
/* add this daemon to the list */
opal_list_append(daemons, &dmn->super);
}
OBJ_RELEASE(values[i]);
}
@ -212,5 +241,23 @@ CLEANUP:
}
/*
* Retrieve the active daemon(s) for a specific node
* Remove a daemon from the world of active daemons
*/
int orte_pls_base_remove_daemon(orte_pls_daemon_info_t *info)
{
opal_list_t daemons;
int rc;
OBJ_CONSTRUCT(&daemons, opal_list_t);
/* We actually don't want to do this - instead, we need to do a registry
* delete function call targeting this entry
*/
if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, info->active_job))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* find this item in the list */
return ORTE_SUCCESS;
}

Просмотреть файл

@ -58,6 +58,7 @@ int orte_pls_base_orted_exit(opal_list_t *daemons)
item = opal_list_get_next(item)) {
dmn = (orte_pls_daemon_info_t*)item;
opal_output(0, "sending exit cmd to daemon [%ld,%ld,%ld]", ORTE_NAME_ARGS(dmn->name));
if (0 > orte_rml.send_buffer(dmn->name, &cmd, ORTE_RML_TAG_PLS_ORTED, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_DESTRUCT(&cmd);

Просмотреть файл

@ -80,8 +80,9 @@ extern "C" {
int orte_pls_base_orted_add_local_procs(opal_list_t *daemons, orte_gpr_notify_data_t *ndat);
int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job);
int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job);
int orte_pls_base_store_active_daemons(opal_list_t *daemons);
int orte_pls_base_remove_daemon(orte_pls_daemon_info_t *info);
/*
* communications utilities
*/

Просмотреть файл

@ -60,17 +60,14 @@
#include "orte/mca/ns/ns.h"
#include "orte/mca/sds/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/ras/base/base.h"
#include "orte/mca/ras/ras.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/rmaps/rmaps_types.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/smr/smr.h"
#include "orte/runtime/orte_wait.h"
#include "orte/runtime/runtime.h"
/* remove this when moved to 2.0 */
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/pls/base/pls_private.h"
#include "pls_bproc.h"
@ -104,7 +101,7 @@ orte_pls_base_module_t orte_pls_bproc_module = {
};
static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
static int orte_pls_bproc_node_array(orte_job_map_t* map,
int ** node_array, int * node_array_len);
static int orte_pls_bproc_node_list(int * node_array, int node_array_len,
int ** node_list, int * num_nodes,
@ -123,12 +120,12 @@ static int bproc_vexecmove(int nnodes, int *nodes, int *pids, const char *cmd,
#endif
static void orte_pls_bproc_setup_env(char *** env);
static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
int ** node_arrays, int * node_array_lens,
int num_contexts, int num_procs,
orte_job_map_t *map,
orte_vpid_t global_vpid_start,
orte_jobid_t jobid, int* num_daemons);
static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
orte_rmaps_base_map_t* map, int num_processes,
orte_job_map_t* map, int num_processes,
int num_slots,
orte_vpid_t vpid_start,
orte_vpid_t global_vpid_start,
int app_context,
@ -144,7 +141,7 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
* @retval >=0 the number of processes
* @retval <0 orte err
*/
static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
static int orte_pls_bproc_node_array(orte_job_map_t* map,
int ** node_array, int * node_array_len) {
opal_list_item_t* item;
int num_procs = 0;
@ -156,8 +153,8 @@ static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
if(*node_array_len < atol(((orte_rmaps_base_node_t*)item)->node->node_name)) {
*node_array_len = atol(((orte_rmaps_base_node_t*)item)->node->node_name);
if(*node_array_len < atol(((orte_mapped_node_t*)item)->nodename)) {
*node_array_len = atol(((orte_mapped_node_t*)item)->nodename);
}
}
(*node_array_len)++;
@ -172,9 +169,9 @@ static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_rmaps_base_node_t* node = (orte_rmaps_base_node_t*)item;
num_on_node = opal_list_get_size(&node->node_procs);
(*node_array)[atol(node->node->node_name)] += num_on_node;
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
num_on_node = opal_list_get_size(&node->procs);
(*node_array)[atol(node->nodename)] += num_on_node;
num_procs += num_on_node;
}
return num_procs;
@ -493,14 +490,12 @@ static void orte_pls_bproc_setup_env(char *** env)
* @retval error
*/
static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
int ** node_arrays, int * node_array_lens,
int num_contexts, int num_procs,
orte_job_map_t *map,
orte_vpid_t global_vpid_start,
orte_jobid_t jobid, int *num_launched) {
int * daemon_list = NULL;
int num_nodes = 0;
int num_daemons = 0;
int rc, i, j;
int rc, i;
int * pids = NULL;
int argc;
char ** argv = NULL;
@ -524,26 +519,25 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
*/
OBJ_CONSTRUCT(&daemons, opal_list_t);
/* find the length of the longest node array */
for(i = 0; i < num_contexts; i++) {
if(node_array_lens[i] > num_nodes) {
num_nodes = node_array_lens[i];
}
}
if(NULL == (daemon_list = (int*)malloc(sizeof(int) * num_nodes))) {
/* get the number of nodes in this job and allocate an array for
* their names so we can pass that to bproc - populate the list
* with the node names
*/
num_daemons = opal_list_get_size(&map->nodes);
if(NULL == (daemon_list = (int*)malloc(sizeof(int) * num_daemons))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup;
}
/* create a list of all the nodes that need daemons, which is all the nodes
* that will have at least 1 process */
for(i = 0; i < num_nodes; i++) {
for(j = 0; j < num_contexts; j++) {
if(i < node_array_lens[j] && 0 < *(node_arrays[j] + i)) {
daemon_list[num_daemons++] = i;
break;
}
}
i = 0;
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_mapped_node_t *node = (orte_mapped_node_t*)item;
daemon_list[i++] = atoi(node->nodename);
}
/* allocate storage to save the daemon pids */
if(NULL == (pids = (int*)malloc(sizeof(int) * num_daemons))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup;
@ -574,7 +568,7 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
/* set up the base environment so the daemons can get their names once launched */
rc = orte_ns_nds_bproc_put(cellid, daemon_jobid, daemon_vpid_start,
global_vpid_start, num_procs, envp);
global_vpid_start, num_daemons, envp);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -695,7 +689,7 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
}
}
/* store the daemon info */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc);
}
*num_launched = num_daemons;
@ -857,19 +851,20 @@ orte_pls_bproc_monitor_nodes(void)
* @retval error
*/
static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
orte_rmaps_base_map_t* map, int num_processes,
orte_job_map_t* map, int num_processes, int num_slots,
orte_vpid_t vpid_start,
orte_vpid_t global_vpid_start,
int app_context, int * node_array,
int node_array_len) {
int * node_list = NULL;
int num_nodes, num_slots, cycle;
int num_nodes, cycle;
int rc, i, j, stride;
int * pids = NULL;
char * var, * param;
orte_process_name_t * proc_name;
struct bproc_io_t bproc_io[3];
orte_rmaps_base_node_t *node;
char **env;
int dbg;
OPAL_TRACE(1);
@ -877,25 +872,16 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* point to the env array for this app_context */
env = map->apps[app_context]->env;
/* set up app context */
asprintf(&param, "%d", app_context);
var = mca_base_param_environ_variable("pls", "bproc", "app_context");
opal_setenv(var, param, true, &map->app->env);
opal_setenv(var, param, true, &env);
free(param);
free(var);
/* in order for bproc processes to properly compute their name,
* we have to provide them with info on the number of slots
* on each node (which is a constant in bproc). We will pass this
* in an appropriate parameter which we set for each app_context
*/
node = (orte_rmaps_base_node_t*)opal_list_get_first(&map->nodes);
if (NULL == node) {
ORTE_ERROR_LOG(ORTE_ERROR);
return ORTE_ERROR;
}
num_slots = node->node->node_slots;
/* set the vpid-to-vpid stride based on the mapping mode */
if (mca_pls_bproc_component.bynode) {
@ -914,7 +900,7 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
/* and push that value into the process' environment */
asprintf(&param, "%ld", (long)stride);
var = mca_base_param_environ_variable("pls", "bproc", "stride");
opal_setenv(var, param, true, &map->app->env);
opal_setenv(var, param, true, &env);
free(param);
free(var);
@ -943,11 +929,14 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
ORTE_ERROR_LOG(rc);
goto cleanup;
}
fprintf(stderr, "launching app %s\n", map->apps[app_context]->app);
while(0 != num_nodes) {
fprintf(stderr, "\tlaunching cycle %d\n", i);
for (dbg=0; dbg<num_nodes; dbg++) fprintf(stderr, "\t\tlaunching on node %d\n", node_list[dbg]);
/* setup environment so the procs can figure out their names */
rc = orte_ns_nds_bproc_put(cellid, jobid, vpid_start, global_vpid_start,
num_processes, &map->app->env);
num_processes, &env);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -962,21 +951,22 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
opal_output(0, "pls_bproc: launching %d processes", num_nodes);
}
rc = bproc_vexecmove_io(num_nodes, node_list, pids, bproc_io, 3,
map->app->app, map->app->argv, map->app->env);
map->apps[app_context]->app,
map->apps[app_context]->argv, env);
if(0 < mca_pls_bproc_component.debug) {
opal_output(0, "pls_bproc: %d processes launched. First pid: %d",
rc, *pids);
}
if(rc != num_nodes) {
opal_show_help("help-pls-bproc.txt", "proc-launch-number", true,
num_nodes, rc, map->app->app);
num_nodes, rc, map->apps[app_context]->app);
rc = ORTE_ERROR;
goto cleanup;
}
for(j = 0; j < num_nodes; j++) {
if(0 >= pids[j]) {
opal_show_help("help-pls-bproc.txt", "proc-launch-bad-pid", true,
node_list[j], pids[j], errno, map->app->app);
node_list[j], pids[j], errno, map->apps[app_context]->app);
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -1056,10 +1046,10 @@ cleanup:
* @retval error
*/
int orte_pls_bproc_launch(orte_jobid_t jobid) {
opal_list_item_t* item, *item2;
opal_list_t mapping;
opal_list_item_t* item;
orte_cellid_t cellid;
orte_rmaps_base_map_t* map;
orte_job_map_t* map;
orte_mapped_node_t *map_node;
orte_vpid_t vpid_launch;
orte_vpid_t vpid_range;
orte_vpid_t vpid_start;
@ -1068,11 +1058,13 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
int ** node_array = NULL;
int * node_array_len = NULL;
int num_processes = 0;
int num_daemons = 0;
int context = 0;
int j;
int num_daemons;
int num_slots;
int context;
int i, j;
orte_std_cntr_t idx;
char cwd_save[OMPI_PATH_MAX + 1];
orte_ras_node_t *ras_node;
OPAL_TRACE(1);
@ -1089,12 +1081,12 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
}
cwd_save[sizeof(cwd_save) - 1] = '\0';
/* query for the application context and allocated nodes */
OBJ_CONSTRUCT(&mapping, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_map(jobid, &mapping))) {
/* get the job map */
if(ORTE_SUCCESS != (rc = orte_rmaps.get_job_map(&map, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(ORTE_SUCCESS != (rc = orte_rmgr.get_vpid_range(jobid, &vpid_start,
&vpid_range))) {
ORTE_ERROR_LOG(rc);
@ -1104,86 +1096,80 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
/* get the cellid */
cellid = orte_process_info.my_name->cellid;
/* do a large lock so the processes will not decrement the process count
* until we are done launching */
/* check all of the app_contexts for sanity */
for (i=0; i < map->num_apps; i++) {
/* Check that the cwd is sane. We have to chdir there in
to check the executable, because the executable could
have been specified as a relative path to the wdir */
rc = orte_rmgr.check_context_cwd(map->apps[i], true);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
for (item = opal_list_get_first(&mapping);
item != opal_list_get_end(&mapping);
item = opal_list_get_next(item)) {
orte_std_cntr_t i;
map = (orte_rmaps_base_map_t*) item;
orte_dss.dump(0, map, ORTE_JOB_MAP);
for (i = 0; i < map->num_procs; ++i) {
orte_app_context_t *context = map->app;
/* Check that the app exists and is executable */
rc = orte_rmgr.check_context_app(map->apps[i]);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
/* Check that the cwd is sane. We have to chdir there in
to check the executable, because the executable could
have been specified as a relative path to the wdir */
rc = orte_rmgr.check_context_cwd(context, true);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
/* Check that the app exists and is executable */
rc = orte_rmgr.check_context_app(context);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
/* Return to the original dir */
if (0 != chdir(cwd_save)) {
rc = ORTE_ERR_IN_ERRNO;
goto cleanup;
}
/* Return to the original dir */
if (0 != chdir(cwd_save)) {
rc = ORTE_ERR_IN_ERRNO;
goto cleanup;
}
}
/* For Bproc, we need to know how many slots were allocated on each
* node so the spawned processes can computer their name. Only Bproc
* needs to do this, so we choose not to modify the mapped_node struct
* to hold this info - bproc can go get it.
*
* Since Bproc also requires that the slots allocated on each node
* be the same, we really only need to lookup a single node. So grab
* the data for the first node on the map
*/
map_node = (orte_mapped_node_t*)opal_list_get_first(&map->nodes);
if (NULL == (ras_node = orte_ras.node_lookup(map_node->cell, map_node->nodename))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
goto cleanup;
}
num_slots = ras_node->node_slots;
OBJ_RELEASE(ras_node);
if(0 < mca_pls_bproc_component.debug) {
opal_output(0, "pls_bproc: --- starting to launch procs ---");
}
/* create an array to hold the pointers to the node arrays for each app
* context. Also, create an array to hold the lengths of the node arrays */
node_array = malloc(opal_list_get_size(&mapping) * sizeof(int *));
node_array_len = malloc(opal_list_get_size(&mapping) * sizeof(int *));
node_array = malloc(map->num_apps * sizeof(int *));
node_array_len = malloc(map->num_apps * sizeof(int *));
/* for each application context - create a node array and setup its env */
for(item = opal_list_get_first(&mapping);
item != opal_list_get_end(&mapping);
item = opal_list_get_next(item)) {
map = (orte_rmaps_base_map_t*)item;
rc = orte_pls_bproc_node_array(map, &node_array[context],
&node_array_len[context]);
for(i=0; i < map->num_apps; i++) {
rc = orte_pls_bproc_node_array(map, &node_array[i],
&node_array_len[i]);
if(0 > rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
orte_pls_bproc_setup_env(&map->app->env);
orte_pls_bproc_setup_env(&map->apps[i]->env);
num_processes += rc;
context++;
}
/* save the active node names */
idx = 0;
for (item = opal_list_get_first(&mapping);
item != opal_list_get_end(&mapping);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t *map = (orte_rmaps_base_map_t*) item;
for (item2 = opal_list_get_first(&map->nodes);
item2 != opal_list_get_end(&map->nodes);
item2 = opal_list_get_next(item2)) {
orte_ras_node_t* node = (orte_ras_node_t*) item2;
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_mapped_node_t* node = (orte_mapped_node_t*) item;
rc = orte_pointer_array_add(&idx, mca_pls_bproc_component.active_node_names,
strdup(node->node_name));
}
rc = orte_pointer_array_add(&idx, mca_pls_bproc_component.active_node_names,
strdup(node->nodename));
}
/* setup subscription for each node so we can detect
when the node's state changes, usefull for aborting when
when the node's state changes, useful for aborting when
a bproc node up and dies */
rc = orte_pls_bproc_monitor_nodes();
@ -1193,9 +1179,11 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
goto cleanup;
}
/* launch the daemons on all the nodes which have processes assign to them */
rc = orte_pls_bproc_launch_daemons(cellid, &map->app->env, node_array,
node_array_len, context, num_processes,
/* launch the daemons on all the nodes which have processes assigned to them.
* We need to send along an appropriate environment for the daemons. Since
* there must be at least ONE app_context, we can just take that one
*/
rc = orte_pls_bproc_launch_daemons(cellid, &map->apps[0]->env, map,
vpid_start, jobid, &num_daemons);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
@ -1238,44 +1226,35 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
}
}
context = 0;
vpid_launch = vpid_start;
opal_output(0, "launching apps");
/* for each application context launch the app */
for(item = opal_list_get_first(&mapping);
item != opal_list_get_end(&mapping);
item = opal_list_get_next(item)) {
map = (orte_rmaps_base_map_t*)item;
rc = orte_rmgr.check_context_cwd(map->app, true);
for(context=0; context < map->num_apps; context++) {
rc = orte_rmgr.check_context_cwd(map->apps[context], true);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
rc = orte_pls_bproc_launch_app(cellid, jobid, map, num_processes,
vpid_launch, vpid_start, map->app->idx,
rc = orte_pls_bproc_launch_app(cellid, jobid, map, num_processes, num_slots,
vpid_launch, vpid_start, context,
node_array[context], node_array_len[context]);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
free(node_array[context]);
context++;
vpid_launch = vpid_start + mca_pls_bproc_component.num_procs;
}
mca_pls_bproc_component.done_launching = true;
cleanup:
chdir(cwd_save);
while(NULL != (item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(item);
}
if(NULL != node_array) {
free(node_array);
}
if(NULL != node_array_len) {
free(node_array_len);
}
OBJ_DESTRUCT(&mapping);
return rc;
}

Просмотреть файл

@ -81,11 +81,9 @@
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/smr/smr.h"
/* clean up for ORTE 2.0 */
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/pls/pls.h"
#include "orte/mca/pls/base/pls_private.h"
#include "orte/mca/pls/gridengine/pls_gridengine.h"
@ -104,21 +102,10 @@ orte_pls_base_module_t orte_pls_gridengine_module = {
orte_pls_gridengine_finalize
};
/**
* struct used to have enough information to clean up the state of the
* universe if a daemon aborts
*/
struct gridengine_daemon_info_t {
opal_object_t super;
orte_process_name_t *name;
char *nodename;
};
typedef struct gridengine_daemon_info_t gridengine_daemon_info_t;
static OBJ_CLASS_INSTANCE(gridengine_daemon_info_t,
opal_object_t,
NULL, NULL);
static void set_handler_default(int sig);
#if 0
static int update_slot_keyval(orte_ras_node_t* node, int* slot_cnt);
#endif
/**
* Fill the orted_path variable with the directory to the orted
@ -146,7 +133,7 @@ static int orte_pls_gridengine_fill_orted_path(char** orted_path)
*/
static void orte_pls_gridengine_wait_daemon(pid_t pid, int status, void* cbdata)
{
gridengine_daemon_info_t *info = (gridengine_daemon_info_t*) cbdata;
orte_pls_daemon_info_t *info = (orte_pls_daemon_info_t*) cbdata;
int rc;
/* if qrsh exited abnormally, set the daemon's state to aborted
@ -204,16 +191,16 @@ static void orte_pls_gridengine_wait_daemon(pid_t pid, int status, void* cbdata)
*/
int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
{
opal_list_t mapping;
opal_list_item_t* m_item, *n_item;
orte_job_map_t *map;
opal_list_item_t *n_item;
orte_std_cntr_t num_nodes;
orte_vpid_t vpid;
int node_name_index1;
int node_name_index2;
int proc_name_index;
int orted_index;
int call_yield_index;
char *jobid_string;
char *prefix_dir;
char *uri, *param;
char **argv;
int argc;
@ -229,26 +216,19 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
*/
OBJ_CONSTRUCT(&daemons, opal_list_t);
/* Query the list of nodes allocated and mapped to this job.
/* Get the map for this job.
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
* All other mapping responsibilities fall to orted in the fork PLS
*/
OBJ_CONSTRUCT(&mapping, opal_list_t);
rc = orte_rmaps_base_get_map(jobid, &mapping);
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
num_nodes = 0;
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
num_nodes += opal_list_get_size(&map->nodes);
}
num_nodes = (orte_std_cntr_t)opal_list_get_size(&map->nodes);
/*
* Allocate a range of vpids for the daemons.
@ -353,10 +333,6 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
free(uri);
free(param);
opal_argv_append(&argc, &argv, "--mpi-call-yield");
call_yield_index = argc;
opal_argv_append(&argc, &argv, "0");
if (mca_pls_gridengine_component.debug) {
param = opal_argv_join(argv, ' ');
if (NULL != param) {
@ -368,332 +344,292 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
/* Figure out the basenames for the libdir and bindir. There is a
lengthy comment about this in pls_rsh_module.c explaining all
the rationale for how / why we're doing this. */
the rationale for how / why we're doing this.
*/
lib_base = opal_basename(OPAL_LIBDIR);
bin_base = opal_basename(OPAL_BINDIR);
/*
* Iterate through each of the contexts
/* See the note about prefix_dir in the orte/mca/pls/slurm/pls_slurm.c
* module. Fo here, just note that we must have at least one app_context,
* and we take the prefix_dir from that first one.
*/
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
char *prefix_dir = map->app->prefix_dir;
prefix_dir = map->apps[0]->prefix_dir;
/*
* For each of the contexts - iterate through the nodes.
*/
for(n_item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
orte_ras_node_t* ras_node = rmaps_node->node;
orte_process_name_t* name;
pid_t pid;
char *exec_path, *orted_path;
char **exec_argv;
int remain_slot_cnt;
/*
* Iterate through the nodes.
*/
for(n_item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) {
orte_mapped_node_t* rmaps_node = (orte_mapped_node_t*)n_item;
orte_process_name_t* name;
pid_t pid;
char *exec_path, *orted_path;
char **exec_argv;
#if 0
int remain_slot_cnt;
/* RHC - I don't believe this code is really necessary any longer.
* The mapper correctly accounts for slots that have already been
* used. Even if another job starts to run between the time the
* mapper maps this job and we get to this point, the new job
* will have gone through the mapper and will not overuse the node.
* As this code consumes considerable time, I have sliced it out
* of the code for now.
*
* query the registry for the remaining gridengine slot count on
* this node, and update the registry for the count for the
* current process launch */
if (ORTE_SUCCESS != (rc =
update_slot_keyval(ras_node, &remain_slot_cnt))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* already launched on this node */
if(ras_node->node_launched++ != 0) {
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: already launched on this node, %s",
ras_node->node_name);
}
continue;
}
/* query the registry for the remaining gridengine slot count on
* this node, and update the registry for the count for the
* current process launch */
if (ORTE_SUCCESS != (rc =
update_slot_keyval(ras_node, &remain_slot_cnt))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* check for the unlikely scenario, because gridengine ras already
* checks for it, but still provide a check there. */
if (remain_slot_cnt < 0) {
opal_show_help("help-pls-gridengine.txt", "insufficient-pe-slot",
true, ras_node->node_name, true);
exit(-1); /* exit instead of return ORTE_ERR_OUT_OF_RESOURCE */
}
/* setup node name */
free(argv[node_name_index1]);
if (NULL != ras_node->node_username &&
0 != strlen (ras_node->node_username)) {
asprintf(&argv[node_name_index1], "%s@%s",
ras_node->node_username, ras_node->node_name);
} else {
argv[node_name_index1] = strdup(ras_node->node_name);
}
free(argv[node_name_index2]);
argv[node_name_index2] = strdup(ras_node->node_name);
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, ras_node->node_cellid, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid;
dmn->cell = ras_node->node_cellid;
dmn->nodename = strdup(ras_node->node_name);
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
opal_list_append(&daemons, &dmn->super);
#ifdef __WINDOWS__
printf("Unimplemented feature for windows\n");
return ORTE_ERR_NOT_IMPLEMENTED;
#else
/* fork a child to do qrsh */
pid = fork();
/* check for the unlikely scenario, because gridengine ras already
* checks for it, but still provide a check there. */
if (remain_slot_cnt < 0) {
opal_show_help("help-pls-gridengine.txt", "insufficient-pe-slot",
true, ras_node->node_name, true);
exit(-1); /* exit instead of return ORTE_ERR_OUT_OF_RESOURCE */
}
#endif
if (pid < 0) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
/* setup node name */
free(argv[node_name_index1]);
if (NULL != rmaps_node->username &&
0 != strlen (rmaps_node->username)) {
asprintf(&argv[node_name_index1], "%s@%s",
rmaps_node->username, rmaps_node->nodename);
} else {
argv[node_name_index1] = strdup(rmaps_node->nodename);
}
free(argv[node_name_index2]);
argv[node_name_index2] = strdup(rmaps_node->nodename);
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, rmaps_node->cell, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid;
dmn->cell = rmaps_node->cell;
dmn->nodename = strdup(rmaps_node->nodename);
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
opal_list_append(&daemons, &dmn->super);
#ifdef __WINDOWS__
printf("Unimplemented feature for windows\n");
return ORTE_ERR_NOT_IMPLEMENTED;
#else
/* fork a child to do qrsh */
pid = fork();
#endif
if (pid < 0) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/* child */
if (pid == 0) {
char* name_string;
char** env;
char* var;
long fd, fdmax = sysconf(_SC_OPEN_MAX);
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: launching on node %s",
rmaps_node->nodename);
}
/* child */
if (pid == 0) {
char* name_string;
char** env;
char* var;
long fd, fdmax = sysconf(_SC_OPEN_MAX);
/* setting exec_argv and exec_path for qrsh */
exec_argv = &argv[0];
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: launching on node %s",
ras_node->node_name);
sge_root = getenv("SGE_ROOT");
sge_arch = getenv("ARC");
asprintf(&exec_path, "%s/bin/%s/qrsh", sge_root, sge_arch);
exec_path = opal_path_findv(exec_path, X_OK, environ, NULL);
if (NULL == exec_path) {
opal_show_help("help-pls-gridengine.txt", "bad-qrsh-path",
true, exec_path, sge_root, sge_arch);
return ORTE_ERR_NOT_FOUND;
}
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: exec_argv[0]=%s, exec_path=%s",
exec_argv[0], exec_path);
}
/* setting orted_path for orted */
orted_path = opal_path_findv(exec_argv[orted_index], 0, environ, NULL);
if (NULL == orted_path && NULL == prefix_dir) {
rc = orte_pls_gridengine_fill_orted_path(&orted_path);
if (ORTE_SUCCESS != rc) {
return rc;
}
/* set the progress engine schedule for this node.
* if node_slots is set to zero, then we default to
* NOT being oversubscribed
*/
if (ras_node->node_slots > 0 &&
(orte_std_cntr_t)opal_list_get_size(&rmaps_node->node_procs) > ras_node->node_slots) {
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
ras_node->node_slots, opal_list_get_size(&rmaps_node->node_procs));
}
free(argv[call_yield_index]);
argv[call_yield_index] = strdup("1");
} else {
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
free(argv[call_yield_index]);
argv[call_yield_index] = strdup("0");
} else {
if (NULL != prefix_dir) {
orted_path = opal_os_path( false, prefix_dir, bin_base, "orted", NULL );
}
/* setting exec_argv and exec_path for qrsh */
exec_argv = &argv[0];
sge_root = getenv("SGE_ROOT");
sge_arch = getenv("ARC");
asprintf(&exec_path, "%s/bin/%s/qrsh", sge_root, sge_arch);
exec_path = opal_path_findv(exec_path, X_OK, environ, NULL);
if (NULL == exec_path) {
opal_show_help("help-pls-gridengine.txt", "bad-qrsh-path",
true, exec_path, sge_root, sge_arch);
return ORTE_ERR_NOT_FOUND;
}
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: exec_argv[0]=%s, exec_path=%s",
exec_argv[0], exec_path);
}
/* setting orted_path for orted */
orted_path = opal_path_findv(exec_argv[orted_index], 0, environ, NULL);
if (NULL == orted_path && NULL == prefix_dir) {
/* If we yet did not fill up the orted_path, do so now */
if (NULL == orted_path) {
rc = orte_pls_gridengine_fill_orted_path(&orted_path);
if (ORTE_SUCCESS != rc) {
return rc;
}
} else {
if (NULL != prefix_dir) {
orted_path = opal_os_path( false, prefix_dir, bin_base, "orted", NULL );
}
/* If we yet did not fill up the orted_path, do so now */
if (NULL == orted_path) {
rc = orte_pls_gridengine_fill_orted_path(&orted_path);
if (ORTE_SUCCESS != rc) {
return rc;
}
}
}
asprintf(&argv[orted_index], orted_path);
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: orted_path=%s", orted_path);
}
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables. We're
already in the child process, so it's ok to modify
environ. */
if (NULL != prefix_dir) {
char *oldenv, *newenv;
/* Reset PATH */
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
oldenv = getenv("PATH");
if (NULL != oldenv) {
char *temp;
asprintf(&temp, "%s:%s", newenv, oldenv);
free( newenv );
newenv = temp;
}
opal_setenv("PATH", newenv, true, &environ);
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: reset PATH: %s", newenv);
}
free(newenv);
/* Reset LD_LIBRARY_PATH */
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
oldenv = getenv("LD_LIBRARY_PATH");
if (NULL != oldenv) {
char* temp;
asprintf(&temp, "%s:%s", newenv, oldenv);
free(newenv);
newenv = temp;
}
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: reset LD_LIBRARY_PATH: %s",
newenv);
}
free(newenv);
}
var = getenv("HOME");
if (NULL != var) {
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: changing to directory %s",
var);
}
/* Ignore errors -- what are we going to do?
(and we ignore errors on the remote nodes
in the fork pls, so this is consistent) */
chdir(var);
}
/* setup process name */
rc = orte_ns.get_proc_name_string(&name_string, name);
if (ORTE_SUCCESS != rc) {
opal_output(0, "pls:gridengine: unable to create process name");
exit(-1);
}
free(argv[proc_name_index]);
argv[proc_name_index] = strdup(name_string);
if (!mca_pls_gridengine_component.debug) {
/* setup stdin */
int fd = open("/dev/null", O_RDWR, 0);
dup2(fd, 0);
close(fd);
}
/* close all file descriptors w/ exception of stdin/stdout/stderr */
for(fd=3; fd<fdmax; fd++)
close(fd);
/* Set signal handlers back to the default. Do this close
to the execve() because the event library may (and likely
will) reset them. If we don't do this, the event
library may have left some set that, at least on some
OS's, don't get reset via fork() or exec(). Hence, the
orted could be unkillable (for example). */
set_handler_default(SIGTERM);
set_handler_default(SIGINT);
#ifndef __WINDOWS__
set_handler_default(SIGHUP);
set_handler_default(SIGPIPE);
#endif
set_handler_default(SIGCHLD);
/* Unblock all signals, for many of the same reasons that
we set the default handlers, above. This is noticable
on Linux where the event library blocks SIGTERM, but we
don't want that blocked by the orted (or, more
specifically, we don't want it to be blocked by the
orted and then inherited by the ORTE processes that it
forks, making them unkillable by SIGTERM). */
#ifndef __WINDOWS__
sigprocmask(0, 0, &sigs);
sigprocmask(SIG_UNBLOCK, &sigs, 0);
#endif
/* setup environment */
env = opal_argv_copy(environ);
var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env);
/* exec the daemon */
if (mca_pls_gridengine_component.debug) {
param = opal_argv_join(exec_argv, ' ');
if (NULL != param) {
opal_output(0, "pls:gridengine: executing: %s", param);
free(param);
}
}
execve(exec_path, exec_argv, env);
opal_output(0, "pls:gridengine: execve failed with errno=%d\n", errno);
exit(-1);
} else { /* parent */
gridengine_daemon_info_t *daemon_info;
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: parent");
}
/* setup callback on sigchild - wait until setup above is complete
* as the callback can occur in the call to orte_wait_cb
*/
daemon_info = OBJ_NEW(gridengine_daemon_info_t);
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(daemon_info->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
}
daemon_info->nodename= strdup(ras_node->node_name);
orte_wait_cb(pid, orte_pls_gridengine_wait_daemon, daemon_info);
vpid++;
}
free(name);
asprintf(&argv[orted_index], orted_path);
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: orted_path=%s", orted_path);
}
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables. We're
already in the child process, so it's ok to modify
environ. */
if (NULL != prefix_dir) {
char *oldenv, *newenv;
/* Reset PATH */
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
oldenv = getenv("PATH");
if (NULL != oldenv) {
char *temp;
asprintf(&temp, "%s:%s", newenv, oldenv);
free( newenv );
newenv = temp;
}
opal_setenv("PATH", newenv, true, &environ);
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: reset PATH: %s", newenv);
}
free(newenv);
/* Reset LD_LIBRARY_PATH */
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
oldenv = getenv("LD_LIBRARY_PATH");
if (NULL != oldenv) {
char* temp;
asprintf(&temp, "%s:%s", newenv, oldenv);
free(newenv);
newenv = temp;
}
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: reset LD_LIBRARY_PATH: %s",
newenv);
}
free(newenv);
}
var = getenv("HOME");
if (NULL != var) {
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: changing to directory %s",
var);
}
/* Ignore errors -- what are we going to do?
(and we ignore errors on the remote nodes
in the fork pls, so this is consistent) */
chdir(var);
}
/* setup process name */
rc = orte_ns.get_proc_name_string(&name_string, name);
if (ORTE_SUCCESS != rc) {
opal_output(0, "pls:gridengine: unable to create process name");
exit(-1);
}
free(argv[proc_name_index]);
argv[proc_name_index] = strdup(name_string);
if (!mca_pls_gridengine_component.debug) {
/* setup stdin */
int fd = open("/dev/null", O_RDWR, 0);
dup2(fd, 0);
close(fd);
}
/* close all file descriptors w/ exception of stdin/stdout/stderr */
for(fd=3; fd<fdmax; fd++)
close(fd);
/* Set signal handlers back to the default. Do this close
to the execve() because the event library may (and likely
will) reset them. If we don't do this, the event
library may have left some set that, at least on some
OS's, don't get reset via fork() or exec(). Hence, the
orted could be unkillable (for example). */
set_handler_default(SIGTERM);
set_handler_default(SIGINT);
#ifndef __WINDOWS__
set_handler_default(SIGHUP);
set_handler_default(SIGPIPE);
#endif
set_handler_default(SIGCHLD);
/* Unblock all signals, for many of the same reasons that
we set the default handlers, above. This is noticable
on Linux where the event library blocks SIGTERM, but we
don't want that blocked by the orted (or, more
specifically, we don't want it to be blocked by the
orted and then inherited by the ORTE processes that it
forks, making them unkillable by SIGTERM). */
#ifndef __WINDOWS__
sigprocmask(0, 0, &sigs);
sigprocmask(SIG_UNBLOCK, &sigs, 0);
#endif
/* setup environment */
env = opal_argv_copy(environ);
var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env);
/* exec the daemon */
if (mca_pls_gridengine_component.debug) {
param = opal_argv_join(exec_argv, ' ');
if (NULL != param) {
opal_output(0, "pls:gridengine: executing: %s", param);
free(param);
}
}
execve(exec_path, exec_argv, env);
opal_output(0, "pls:gridengine: execve failed with errno=%d\n", errno);
exit(-1);
} else { /* parent */
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: parent");
}
/* setup callback on sigchild - wait until setup above is complete
* as the callback can occur in the call to orte_wait_cb
*/
orte_wait_cb(pid, orte_pls_gridengine_wait_daemon, dmn);
vpid++;
}
free(name);
}
/* all done, so store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&mapping);
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&daemons);
if (NULL != lib_base) {
free(lib_base);
}
@ -707,6 +643,7 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
return rc;
}
#if 0
/**
* Query the registry for the gridengine slot count, and update it
*/
@ -808,6 +745,7 @@ static int update_slot_keyval(orte_ras_node_t* ras_node, int* slot_cnt)
return rc;
}
#endif
/**
* Query the registry for all nodes participating in the job

Просмотреть файл

@ -38,6 +38,7 @@
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/smr/smr.h"
@ -46,10 +47,7 @@
#include "orte/runtime/orte_wait.h"
/* remove for ORTE 2.0 */
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/sds/base/base.h"
#include "orte/mca/rmgr/base/rmgr_private.h"
#include "orte/mca/pls/pls.h"
#include "orte/mca/pls/poe/pls_poe.h"
@ -338,33 +336,34 @@ poe_wait_job - call back when POE finish
*/
static void poe_wait_job(pid_t pid, int status, void* cbdata)
{
opal_list_t map;
opal_list_item_t* item;
orte_job_map_t *map;
opal_list_item_t *item, *item2;
int rc;
/* query allocation for the job */
OBJ_CONSTRUCT(&map, opal_list_t);
rc = orte_rmaps_base_get_map(mca_pls_poe_component.jobid,&map);
rc = orte_rmaps.get_job_map(&map, mca_pls_poe_component.jobid);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
for(item = opal_list_get_first(&map);
item != opal_list_get_end(&map);
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item;
orte_std_cntr_t i;
orte_mapped_node_t* node = (orte_mapped_node_t*) item;
for(i = 0 ; i < map->num_procs ; ++i) {
orte_session_dir_finalize(&(map->procs[i])->proc_name);
rc = orte_smr.set_proc_state(&(map->procs[i]->proc_name),
for (item2 = opal_list_get_first(&node->procs);
item2 != opal_list_get_end(&node->procs);
item2 = opal_list_get_next(item2)) {
orte_mapped_proc_t* proc = (orte_mapped_proc_t*)item2;
orte_session_dir_finalize(&(proc->name));
rc = orte_smr.set_proc_state(&(proc->name),
ORTE_PROC_STATE_ABORTED, status);
}
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
}
}
OBJ_DESTRUCT(&map);
}
/**
@ -379,7 +378,7 @@ poe_create_cmd_file - create POE command file
static int poe_create_cmd_file(
FILE *cfp,
orte_app_context_t* context,
orte_rmaps_base_proc_t* proc,
orte_mapped_proc_t* proc,
orte_vpid_t vpid_start,
orte_vpid_t vpid_range)
{
@ -428,7 +427,7 @@ static int poe_create_cmd_file(
free(uri);
/* push name into environment */
orte_ns_nds_env_put(&proc->proc_name, vpid_start, vpid_range, &environ_copy);
orte_ns_nds_env_put(&proc->name, vpid_start, vpid_range, &environ_copy);
if (context->argv == NULL) {
context->argv = malloc(sizeof(char*)*2);
@ -461,8 +460,8 @@ poe_launch_interactive - launch an interactive job
*/
static inline int poe_launch_interactive_job(orte_jobid_t jobid)
{
opal_list_t map, nodes, mapping_list;
opal_list_item_t* item;
orte_job_map_t *map;
opal_list_item_t *item, *item2;
orte_vpid_t vpid_start, vpid_range;
orte_std_cntr_t num_nodes, num_procs;
FILE *hfp, *cfp;
@ -479,50 +478,46 @@ static inline int poe_launch_interactive_job(orte_jobid_t jobid)
mca_pls_poe_component.jobid = jobid;
OBJ_CONSTRUCT(&nodes, opal_list_t);
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
/* get the map for this job */
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
num_nodes = opal_list_get_size(&nodes);
num_nodes = opal_list_get_size(&map->nodes);
if(!strncmp(mca_pls_poe_component.resource_allocation,"hostfile",8)) {
/* Create a temporary hostlist file if user specify */
/* Create a temporary hostlist file if user specify */
if( (NULL==(mca_pls_poe_component.hostfile=tempnam(NULL,NULL))) ||
(NULL==(hfp=fopen(mca_pls_poe_component.hostfile,"w"))) ) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
for(item = opal_list_get_first(&nodes);
item != opal_list_get_end(&nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
fprintf(hfp,"%s\n",node->node_name);
}
fclose(hfp);
if( (NULL==(mca_pls_poe_component.hostfile=tempnam(NULL,NULL))) ||
(NULL==(hfp=fopen(mca_pls_poe_component.hostfile,"w"))) ) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
rc = orte_rmgr_base_get_job_slots(jobid, &num_procs);
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
OBJ_CONSTRUCT(&map, opal_list_t);
rc = orte_rmaps_base_get_map(jobid,&map);
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
fprintf(hfp,"%s\n",node->nodename);
}
fclose(hfp);
rc = orte_rmgr.get_vpid_range(jobid, &vpid_start, &vpid_range);
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
/* Create a temporary POE command file */
for(item = opal_list_get_first(&map);
item != opal_list_get_end(&map);
num_procs = 0;
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map2 = (orte_rmaps_base_map_t*)item;
orte_std_cntr_t i;
for(i=0; i<map2->num_procs; i++) {
rc = poe_create_cmd_file(cfp, map2->app, map2->procs[i], vpid_start, vpid_range);
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
for (item2 = opal_list_get_first(&node->procs);
item2 != opal_list_get_end(&node->procs);
item2 = opal_list_get_next(item2)) {
orte_mapped_proc_t* proc = (orte_mapped_proc_t*)item2;
rc = poe_create_cmd_file(cfp, map->apps[proc->app_idx], proc, vpid_start, vpid_range);
if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
num_procs++;
}
}
fclose(cfp);
@ -587,20 +582,6 @@ static inline int poe_launch_interactive_job(orte_jobid_t jobid)
cleanup:
while(NULL != (item = opal_list_remove_first(&map))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&map);
while(NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
while(NULL != (item = opal_list_remove_first(&mapping_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping_list);
return rc;
}

Просмотреть файл

@ -81,7 +81,7 @@
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/smr/smr.h"
#include "orte/mca/pls/pls.h"
@ -109,17 +109,6 @@ orte_pls_base_module_t orte_pls_rsh_module = {
orte_pls_rsh_finalize
};
/* struct used to have enough information to clean up the state of the
universe if a daemon aborts */
struct rsh_daemon_info_t {
opal_object_t super;
orte_ras_node_t* node;
orte_jobid_t jobid;
};
typedef struct rsh_daemon_info_t rsh_daemon_info_t;
static OBJ_CLASS_INSTANCE(rsh_daemon_info_t,
opal_object_t,
NULL, NULL);
static void set_handler_default(int sig);
enum {
@ -140,11 +129,15 @@ static const char * orte_pls_rsh_shell_name[] = {
"unknown"
};
/* local global storage of the list of active daemons */
opal_list_t active_daemons;
/**
* Check the Shell variable on the specified node
*/
static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell)
static int orte_pls_rsh_probe(orte_mapped_node_t * node, orte_pls_rsh_shell * shell)
{
char ** argv;
int argc, rc, nfds, i;
@ -156,7 +149,7 @@ static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: going to check SHELL variable on node %s\n",
node->node_name);
node->nodename);
}
*shell = ORTE_PLS_RSH_SHELL_UNKNOWN;
/*
@ -164,7 +157,7 @@ static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell
*/
argv = opal_argv_copy(mca_pls_rsh_component.agent_argv);
argc = mca_pls_rsh_component.agent_argc;
opal_argv_append(&argc, &argv, node->node_name);
opal_argv_append(&argc, &argv, node->nodename);
opal_argv_append(&argc, &argv, "echo $SHELL");
if (pipe(fd)) {
opal_output(0, "pls:rsh: pipe failed with errno=%d\n", errno);
@ -251,7 +244,7 @@ static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell
}
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: node:%s has SHELL:%s\n",
node->node_name, orte_pls_rsh_shell_name[*shell]);
node->nodename, orte_pls_rsh_shell_name[*shell]);
}
return rc;
}
@ -283,9 +276,10 @@ static int orte_pls_rsh_fill_exec_path ( char ** exec_path)
static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
{
rsh_daemon_info_t *info = (rsh_daemon_info_t*) cbdata;
opal_list_t map;
opal_list_item_t* item;
orte_pls_daemon_info_t *info = (orte_pls_daemon_info_t*) cbdata;
orte_mapped_node_t *node;
orte_mapped_proc_t *proc;
opal_list_item_t *item;
int rc;
/* if ssh exited abnormally, set the child processes to aborted
@ -298,11 +292,8 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
*/
if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) {
/* get the mapping for our node so we can cancel the right things */
OBJ_CONSTRUCT(&map, opal_list_t);
rc = orte_rmaps_base_get_node_map(orte_process_info.my_name->cellid,
info->jobid,
info->node->node_name,
&map);
rc = orte_rmaps.get_node_map(&node, info->cell,
info->nodename, info->active_job);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
@ -310,33 +301,30 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
/* set state of all processes associated with the daemon as
terminated */
for(item = opal_list_get_first(&map);
item != opal_list_get_end(&map);
for(item = opal_list_get_first(&node->procs);
item != opal_list_get_end(&node->procs);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item;
orte_std_cntr_t i;
proc = (orte_mapped_proc_t*) item;
for (i = 0 ; i < map->num_procs ; ++i) {
/* Clean up the session directory as if we were the
process itself. This covers the case where the
process died abnormally and didn't cleanup its own
session directory. */
orte_session_dir_finalize(&(map->procs[i])->proc_name);
orte_session_dir_finalize(&(proc->name));
rc = orte_smr.set_proc_state(&(map->procs[i]->proc_name),
rc = orte_smr.set_proc_state(&(proc->name),
ORTE_PROC_STATE_ABORTED, status);
}
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
}
OBJ_DESTRUCT(&map);
OBJ_RELEASE(node);
cleanup:
/* tell the user something went wrong */
opal_output(0, "ERROR: A daemon on node %s failed to start as expected.",
info->node->node_name);
info->nodename);
opal_output(0, "ERROR: There may be more information available from");
opal_output(0, "ERROR: the remote shell (see above).");
@ -361,6 +349,15 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
/* release any waiting threads */
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
/* tell the system that this daemon is gone */
if (ORTE_SUCCESS != (rc = orte_pls_base_remove_daemon(info))) {
ORTE_ERROR_LOG(rc);
}
/* remove the daemon from our local list */
opal_list_remove_item(&active_daemons, &info->super);
OBJ_RELEASE(info);
if (mca_pls_rsh_component.num_children-- >=
mca_pls_rsh_component.num_concurrent ||
mca_pls_rsh_component.num_children == 0) {
@ -368,9 +365,6 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
}
OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);
/* cleanup */
OBJ_RELEASE(info->node);
OBJ_RELEASE(info);
}
/**
@ -380,18 +374,19 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
int orte_pls_rsh_launch(orte_jobid_t jobid)
{
opal_list_t mapping;
opal_list_item_t* m_item, *n_item;
orte_job_map_t *map;
opal_list_item_t *n_item;
orte_mapped_node_t *rmaps_node;
orte_std_cntr_t num_nodes;
orte_vpid_t vpid;
int node_name_index1;
int node_name_index2;
int proc_name_index;
int local_exec_index, local_exec_index_end;
int call_yield_index;
char *jobid_string;
char *uri, *param;
char **argv, **tmp;
char *prefix_dir;
int argc;
int rc;
sigset_t sigs;
@ -399,34 +394,46 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
bool remote_bash = false, remote_csh = false;
bool local_bash = false, local_csh = false;
char *lib_base = NULL, *bin_base = NULL;
opal_list_t daemons;
orte_pls_daemon_info_t *dmn;
/* setup a list that will contain the info for all the daemons
* so we can store it on the registry when done
* so we can store it on the registry when done and use it
* locally to track their state
*/
OBJ_CONSTRUCT(&daemons, opal_list_t);
OBJ_CONSTRUCT(&active_daemons, opal_list_t);
/* Query the list of nodes allocated and mapped to this job.
/* Get the map for this job
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
* - need to know the nodes we are launching on
* All other mapping responsibilities fall to orted in the fork PLS
*/
OBJ_CONSTRUCT(&mapping, opal_list_t);
rc = orte_rmaps_base_get_map(jobid, &mapping);
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
num_nodes = 0;
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
num_nodes += opal_list_get_size(&map->nodes);
}
num_nodes = (orte_std_cntr_t)opal_list_get_size(&map->nodes);
/*
* After a discussion between Ralph & Jeff, we concluded that we
* really are handling the prefix dir option incorrectly. It currently
* is associated with an app_context, yet it really refers to the
* location where OpenRTE/Open MPI is installed on a NODE. Fixing
* this right now would involve significant change to orterun as well
* as elsewhere, so we will intentionally leave this incorrect at this
* point. The error, however, is identical to that seen in all prior
* releases of OpenRTE/Open MPI, so our behavior is no worse than before.
*
* A note to fix this, along with ideas on how to do so, has been filed
* on the project's Trac system under "feature enhancement".
*
* For now, default to the prefix_dir provided in the first app_context.
* Since there always MUST be at least one app_context, we are safe in
* doing this.
*/
prefix_dir = map->apps[0]->prefix_dir;
/*
* Allocate a range of vpids for the daemons.
*/
@ -475,12 +482,8 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
}
} else {
orte_pls_rsh_shell shell;
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)opal_list_get_first(&mapping);
orte_rmaps_base_node_t* rmaps_node =
(orte_rmaps_base_node_t*)opal_list_get_first(&map->nodes);
orte_ras_node_t* node = rmaps_node->node;
rc = orte_pls_rsh_probe(node, &shell);
rmaps_node = (orte_mapped_node_t*)opal_list_get_first(&map->nodes);
rc = orte_pls_rsh_probe(rmaps_node, &shell);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
@ -582,10 +585,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
free(uri);
free(param);
opal_argv_append(&argc, &argv, "--mpi-call-yield");
call_yield_index = argc;
opal_argv_append(&argc, &argv, "0");
local_exec_index_end = argc;
if (!(remote_csh || remote_bash)) {
opal_argv_append(&argc, &argv, ")");
@ -633,378 +632,337 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
bin_base = opal_basename(OPAL_BINDIR);
/*
* Iterate through each of the contexts
* Iterate through each of the nodes
*/
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
char * prefix_dir = map->app->prefix_dir;
for(n_item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) {
orte_process_name_t* name;
pid_t pid;
char *exec_path;
char **exec_argv;
/*
* For each of the contexts - iterate through the nodes.
*/
for(n_item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
orte_ras_node_t* ras_node = rmaps_node->node;
orte_process_name_t* name;
pid_t pid;
char *exec_path;
char **exec_argv;
rmaps_node = (orte_mapped_node_t*)n_item;
/* already launched on this node */
if(ras_node->node_launched++ != 0)
continue;
/* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid;
opal_list_append(&active_daemons, &dmn->super);
/* setup node name */
free(argv[node_name_index1]);
if (NULL != rmaps_node->username &&
0 != strlen (rmaps_node->username)) {
asprintf (&argv[node_name_index1], "%s@%s",
rmaps_node->username, rmaps_node->nodename);
} else {
argv[node_name_index1] = strdup(rmaps_node->nodename);
}
/* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
opal_list_append(&daemons, &dmn->super);
/* setup node name */
free(argv[node_name_index1]);
if (NULL != ras_node->node_username &&
0 != strlen (ras_node->node_username)) {
asprintf (&argv[node_name_index1], "%s@%s",
ras_node->node_username, ras_node->node_name);
} else {
argv[node_name_index1] = strdup(ras_node->node_name);
free(argv[node_name_index2]);
argv[node_name_index2] = strdup(rmaps_node->nodename);
/* save it in the daemon info */
dmn->nodename = strdup(rmaps_node->nodename);
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, rmaps_node->cell, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* save it in the daemon info */
dmn->cell = rmaps_node->cell;
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* rsh a child to exec the rsh/ssh session */
/* set the process state to "launched" */
if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(name, ORTE_PROC_STATE_LAUNCHED, 0))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
pid = fork();
if (pid < 0) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/* child */
if (pid == 0) {
char* name_string;
char** env;
char* var;
long fd, fdmax = sysconf(_SC_OPEN_MAX);
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: launching on node %s\n",
rmaps_node->nodename);
}
free(argv[node_name_index2]);
argv[node_name_index2] = strdup(ras_node->node_name);
/* save it in the daemon info */
dmn->nodename = strdup(ras_node->node_name);
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, ras_node->node_cellid, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* save it in the daemon info */
dmn->cell = ras_node->node_cellid;
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* rsh a child to exec the rsh/ssh session */
/* set the process state to "launched" */
if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(name, ORTE_PROC_STATE_LAUNCHED, 0))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
pid = fork();
if (pid < 0) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/* child */
if (pid == 0) {
char* name_string;
char** env;
char* var;
long fd, fdmax = sysconf(_SC_OPEN_MAX);
/* We don't need to sense an oversubscribed condition and set the sched_yield
* for the node as we are only launching the daemons at this time. The daemons
* are now smart enough to set the oversubscribed condition themselves when
* they launch the local procs.
*/
/* Is this a local launch?
*
* Not all node names may be resolvable (if we found
* localhost in the hostfile, for example). So first
* check trivial case of node_name being same as the
* current nodename, which must be local. If that doesn't
* match, check using ifislocal().
*/
if (0 == strcmp(rmaps_node->nodename, orte_system_info.nodename) ||
opal_ifislocal(rmaps_node->nodename)) {
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: launching on node %s\n",
ras_node->node_name);
opal_output(0, "pls:rsh: %s is a LOCAL node\n",
rmaps_node->nodename);
}
exec_argv = &argv[local_exec_index];
exec_path = opal_path_findv(exec_argv[0], 0, environ, NULL);
/* set the progress engine schedule for this node.
* if node_slots is set to zero, then we default to
* NOT being oversubscribed
*/
if (ras_node->node_slots > 0 &&
(orte_std_cntr_t)opal_list_get_size(&rmaps_node->node_procs) > ras_node->node_slots) {
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
ras_node->node_slots, opal_list_get_size(&rmaps_node->node_procs));
if (NULL == exec_path && NULL == prefix_dir) {
rc = orte_pls_rsh_fill_exec_path (&exec_path);
if (ORTE_SUCCESS != rc) {
return rc;
}
free(argv[call_yield_index]);
argv[call_yield_index] = strdup("1");
} else {
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: not oversubscribed -- setting mpi_yield_when_idle to 0");
if (NULL != prefix_dir) {
exec_path = opal_os_path( false, prefix_dir, bin_base, "orted", NULL );
}
free(argv[call_yield_index]);
argv[call_yield_index] = strdup("0");
}
/* Is this a local launch?
*
* Not all node names may be resolvable (if we found
* localhost in the hostfile, for example). So first
* check trivial case of node_name being same as the
* current nodename, which must be local. If that doesn't
* match, check using ifislocal().
*/
if (0 == strcmp(ras_node->node_name, orte_system_info.nodename) ||
opal_ifislocal(ras_node->node_name)) {
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: %s is a LOCAL node\n",
ras_node->node_name);
}
exec_argv = &argv[local_exec_index];
exec_path = opal_path_findv(exec_argv[0], 0, environ, NULL);
if (NULL == exec_path && NULL == prefix_dir) {
/* If we yet did not fill up the execpath, do so now */
if (NULL == exec_path) {
rc = orte_pls_rsh_fill_exec_path (&exec_path);
if (ORTE_SUCCESS != rc) {
return rc;
}
} else {
if (NULL != prefix_dir) {
exec_path = opal_os_path( false, prefix_dir, bin_base, "orted", NULL );
}
/* If we yet did not fill up the execpath, do so now */
if (NULL == exec_path) {
rc = orte_pls_rsh_fill_exec_path (&exec_path);
if (ORTE_SUCCESS != rc) {
return rc;
}
}
}
}
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables. We're
already in the child process, so it's ok to modify
environ. */
if (NULL != prefix_dir) {
char *oldenv, *newenv;
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables. We're
already in the child process, so it's ok to modify
environ. */
if (NULL != prefix_dir) {
char *oldenv, *newenv;
/* Reset PATH */
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
oldenv = getenv("PATH");
if (NULL != oldenv) {
char *temp;
asprintf(&temp, "%s:%s", newenv, oldenv );
free( newenv );
newenv = temp;
}
opal_setenv("PATH", newenv, true, &environ);
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: reset PATH: %s", newenv);
}
free(newenv);
/* Reset LD_LIBRARY_PATH */
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
oldenv = getenv("LD_LIBRARY_PATH");
if (NULL != oldenv) {
char* temp;
asprintf(&temp, "%s:%s", newenv, oldenv);
free(newenv);
newenv = temp;
}
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: reset LD_LIBRARY_PATH: %s",
newenv);
}
free(newenv);
/* Reset PATH */
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
oldenv = getenv("PATH");
if (NULL != oldenv) {
char *temp;
asprintf(&temp, "%s:%s", newenv, oldenv );
free( newenv );
newenv = temp;
}
/* Since this is a local execution, we need to
potentially whack the final ")" in the argv (if
sh/csh conditionals, from above). Note that we're
modifying the argv[] in the child process, so
there's no need to save this and restore it
afterward -- the parent's argv[] is unmodified. */
if (NULL != argv[local_exec_index_end]) {
free(argv[local_exec_index_end]);
argv[local_exec_index_end] = NULL;
}
/* Finally, chdir($HOME) because we're making the
assumption that this is what will happen on
remote nodes (via rsh/ssh). This allows a user
to specify a path that is relative to $HOME for
both the cwd and argv[0] and it will work on
all nodes -- including the local nost.
Otherwise, it would work on remote nodes and
not the local node. If the user does not start
in $HOME on the remote nodes... well... let's
hope they start in $HOME. :-) */
var = getenv("HOME");
if (NULL != var) {
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: changing to directory %s",
var);
}
/* Ignore errors -- what are we going to do?
(and we ignore errors on the remote nodes
in the fork pls, so this is consistent) */
chdir(var);
}
} else {
opal_setenv("PATH", newenv, true, &environ);
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: %s is a REMOTE node\n",
ras_node->node_name);
opal_output(0, "pls:rsh: reset PATH: %s", newenv);
}
exec_argv = argv;
exec_path = strdup(mca_pls_rsh_component.agent_path);
free(newenv);
if (NULL != prefix_dir) {
if (remote_bash) {
asprintf (&argv[local_exec_index],
"PATH=%s/%s:$PATH ; export PATH ; "
"LD_LIBRARY_PATH=%s/%s:$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; "
"%s/%s/%s",
prefix_dir, bin_base,
prefix_dir, lib_base,
prefix_dir, bin_base,
mca_pls_rsh_component.orted);
}
if (remote_csh) {
/* [t]csh is a bit more challenging -- we
have to check whether LD_LIBRARY_PATH
is already set before we try to set it.
Must be very careful about obeying
[t]csh's order of evaluation and not
using a variable before it is defined.
See this thread for more details:
http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */
asprintf (&argv[local_exec_index],
"set path = ( %s/%s $path ) ; "
"if ( $?LD_LIBRARY_PATH == 1 ) "
"set OMPI_have_llp ; "
"if ( $?LD_LIBRARY_PATH == 0 ) "
"setenv LD_LIBRARY_PATH %s/%s ; "
"if ( $?OMPI_have_llp == 1 ) "
"setenv LD_LIBRARY_PATH %s/%s:$LD_LIBRARY_PATH ; "
"%s/%s/%s",
prefix_dir, bin_base,
prefix_dir, lib_base,
prefix_dir, lib_base,
prefix_dir, bin_base,
mca_pls_rsh_component.orted);
}
/* Reset LD_LIBRARY_PATH */
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
oldenv = getenv("LD_LIBRARY_PATH");
if (NULL != oldenv) {
char* temp;
asprintf(&temp, "%s:%s", newenv, oldenv);
free(newenv);
newenv = temp;
}
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: reset LD_LIBRARY_PATH: %s",
newenv);
}
free(newenv);
}
/* setup process name */
rc = orte_ns.get_proc_name_string(&name_string, name);
if (ORTE_SUCCESS != rc) {
opal_output(0, "orte_pls_rsh: unable to create process name");
exit(-1);
}
free(argv[proc_name_index]);
argv[proc_name_index] = strdup(name_string);
if (!mca_pls_rsh_component.debug) {
/* setup stdin */
int fd = open("/dev/null", O_RDWR);
dup2(fd, 0);
close(fd);
/* Since this is a local execution, we need to
potentially whack the final ")" in the argv (if
sh/csh conditionals, from above). Note that we're
modifying the argv[] in the child process, so
there's no need to save this and restore it
afterward -- the parent's argv[] is unmodified. */
if (NULL != argv[local_exec_index_end]) {
free(argv[local_exec_index_end]);
argv[local_exec_index_end] = NULL;
}
/* close all file descriptors w/ exception of stdin/stdout/stderr */
for(fd=3; fd<fdmax; fd++)
close(fd);
/* Set signal handlers back to the default. Do this close
to the execve() because the event library may (and likely
will) reset them. If we don't do this, the event
library may have left some set that, at least on some
OS's, don't get reset via fork() or exec(). Hence, the
orted could be unkillable (for example). */
set_handler_default(SIGTERM);
set_handler_default(SIGINT);
set_handler_default(SIGHUP);
set_handler_default(SIGPIPE);
set_handler_default(SIGCHLD);
/* Unblock all signals, for many of the same reasons that
we set the default handlers, above. This is noticable
on Linux where the event library blocks SIGTERM, but we
don't want that blocked by the orted (or, more
specifically, we don't want it to be blocked by the
orted and then inherited by the ORTE processes that it
forks, making them unkillable by SIGTERM). */
sigprocmask(0, 0, &sigs);
sigprocmask(SIG_UNBLOCK, &sigs, 0);
/* setup environment */
env = opal_argv_copy(environ);
var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env);
/* exec the daemon */
/* Finally, chdir($HOME) because we're making the
assumption that this is what will happen on
remote nodes (via rsh/ssh). This allows a user
to specify a path that is relative to $HOME for
both the cwd and argv[0] and it will work on
all nodes -- including the local nost.
Otherwise, it would work on remote nodes and
not the local node. If the user does not start
in $HOME on the remote nodes... well... let's
hope they start in $HOME. :-) */
var = getenv("HOME");
if (NULL != var) {
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: changing to directory %s",
var);
}
/* Ignore errors -- what are we going to do?
(and we ignore errors on the remote nodes
in the fork pls, so this is consistent) */
chdir(var);
}
} else {
if (mca_pls_rsh_component.debug) {
param = opal_argv_join(exec_argv, ' ');
if (NULL != param) {
opal_output(0, "pls:rsh: executing: %s", param);
free(param);
opal_output(0, "pls:rsh: %s is a REMOTE node\n",
rmaps_node->nodename);
}
exec_argv = argv;
exec_path = strdup(mca_pls_rsh_component.agent_path);
if (NULL != prefix_dir) {
if (remote_bash) {
asprintf (&argv[local_exec_index],
"PATH=%s/%s:$PATH ; export PATH ; "
"LD_LIBRARY_PATH=%s/%s:$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; "
"%s/%s/%s",
prefix_dir, bin_base,
prefix_dir, lib_base,
prefix_dir, bin_base,
mca_pls_rsh_component.orted);
}
if (remote_csh) {
/* [t]csh is a bit more challenging -- we
have to check whether LD_LIBRARY_PATH
is already set before we try to set it.
Must be very careful about obeying
[t]csh's order of evaluation and not
using a variable before it is defined.
See this thread for more details:
http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */
asprintf (&argv[local_exec_index],
"set path = ( %s/%s $path ) ; "
"if ( $?LD_LIBRARY_PATH == 1 ) "
"set OMPI_have_llp ; "
"if ( $?LD_LIBRARY_PATH == 0 ) "
"setenv LD_LIBRARY_PATH %s/%s ; "
"if ( $?OMPI_have_llp == 1 ) "
"setenv LD_LIBRARY_PATH %s/%s:$LD_LIBRARY_PATH ; "
"%s/%s/%s",
prefix_dir, bin_base,
prefix_dir, lib_base,
prefix_dir, lib_base,
prefix_dir, bin_base,
mca_pls_rsh_component.orted);
}
}
execve(exec_path, exec_argv, env);
opal_output(0, "pls:rsh: execv failed with errno=%d\n", errno);
exit(-1);
} else { /* father */
rsh_daemon_info_t *daemon_info;
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
/* JJH Bug:
* If we are in '--debug-daemons' we keep the ssh connection
* alive for the span of the run. If we use this option
* AND we launch on more than "num_concurrent" machines
* then we will deadlock. No connections are terminated
* until the job is complete, no job is started
* since all the orteds are waiting for all the others
* to come online, and the others ore not launched because
* we are waiting on those that have started to terminate
* their ssh tunnels. :(
*/
if (mca_pls_rsh_component.num_children++ >=
mca_pls_rsh_component.num_concurrent) {
opal_condition_wait(&mca_pls_rsh_component.cond, &mca_pls_rsh_component.lock);
}
OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);
/* setup callback on sigchild - wait until setup above is complete
* as the callback can occur in the call to orte_wait_cb
*/
daemon_info = OBJ_NEW(rsh_daemon_info_t);
OBJ_RETAIN(ras_node);
daemon_info->node = ras_node;
daemon_info->jobid = jobid;
orte_wait_cb(pid, orte_pls_rsh_wait_daemon, daemon_info);
/* if required - add delay to avoid problems w/ X11 authentication */
if (mca_pls_rsh_component.debug && mca_pls_rsh_component.delay) {
sleep(mca_pls_rsh_component.delay);
}
vpid++;
}
free(name);
/* setup process name */
rc = orte_ns.get_proc_name_string(&name_string, name);
if (ORTE_SUCCESS != rc) {
opal_output(0, "orte_pls_rsh: unable to create process name");
exit(-1);
}
free(argv[proc_name_index]);
argv[proc_name_index] = strdup(name_string);
if (!mca_pls_rsh_component.debug) {
/* setup stdin */
int fd = open("/dev/null", O_RDWR);
dup2(fd, 0);
close(fd);
}
/* close all file descriptors w/ exception of stdin/stdout/stderr */
for(fd=3; fd<fdmax; fd++)
close(fd);
/* Set signal handlers back to the default. Do this close
to the execve() because the event library may (and likely
will) reset them. If we don't do this, the event
library may have left some set that, at least on some
OS's, don't get reset via fork() or exec(). Hence, the
orted could be unkillable (for example). */
set_handler_default(SIGTERM);
set_handler_default(SIGINT);
set_handler_default(SIGHUP);
set_handler_default(SIGPIPE);
set_handler_default(SIGCHLD);
/* Unblock all signals, for many of the same reasons that
we set the default handlers, above. This is noticable
on Linux where the event library blocks SIGTERM, but we
don't want that blocked by the orted (or, more
specifically, we don't want it to be blocked by the
orted and then inherited by the ORTE processes that it
forks, making them unkillable by SIGTERM). */
sigprocmask(0, 0, &sigs);
sigprocmask(SIG_UNBLOCK, &sigs, 0);
/* setup environment */
env = opal_argv_copy(environ);
var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env);
/* exec the daemon */
if (mca_pls_rsh_component.debug) {
param = opal_argv_join(exec_argv, ' ');
if (NULL != param) {
opal_output(0, "pls:rsh: executing: %s", param);
free(param);
}
}
execve(exec_path, exec_argv, env);
opal_output(0, "pls:rsh: execv failed with errno=%d\n", errno);
exit(-1);
} else { /* father */
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
/* JJH Bug:
* If we are in '--debug-daemons' we keep the ssh connection
* alive for the span of the run. If we use this option
* AND we launch on more than "num_concurrent" machines
* then we will deadlock. No connections are terminated
* until the job is complete, no job is started
* since all the orteds are waiting for all the others
* to come online, and the others ore not launched because
* we are waiting on those that have started to terminate
* their ssh tunnels. :(
*/
if (mca_pls_rsh_component.num_children++ >=
mca_pls_rsh_component.num_concurrent) {
opal_condition_wait(&mca_pls_rsh_component.cond, &mca_pls_rsh_component.lock);
}
OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);
/* setup callback on sigchild - wait until setup above is complete
* as the callback can occur in the call to orte_wait_cb
*/
orte_wait_cb(pid, orte_pls_rsh_wait_daemon, dmn);
/* if required - add delay to avoid problems w/ X11 authentication */
if (mca_pls_rsh_component.debug && mca_pls_rsh_component.delay) {
sleep(mca_pls_rsh_component.delay);
}
vpid++;
}
free(name);
}
/* all done, so store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&active_daemons))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&mapping);
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&daemons);
/* OBJ_RELEASE(map); */
if (NULL != lib_base) {
free(lib_base);

Просмотреть файл

@ -59,7 +59,7 @@
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/smr/smr.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/pls/pls.h"
#include "orte/mca/pls/base/pls_private.h"
@ -109,8 +109,8 @@ extern char **environ;
static int pls_slurm_launch_job(orte_jobid_t jobid)
{
opal_list_t nodes, mapping_list;
opal_list_item_t *item, *item2;
orte_job_map_t *map;
opal_list_item_t *item;
size_t num_nodes;
orte_vpid_t vpid;
char *jobid_string;
@ -137,15 +137,13 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
*/
OBJ_CONSTRUCT(&daemons, opal_list_t);
/* Query the list of nodes allocated and mapped to this job.
/* Query the map for this job.
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
* All other mapping responsibilities fall to orted in the fork PLS
*/
OBJ_CONSTRUCT(&nodes, opal_list_t);
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
@ -153,7 +151,7 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
/*
* Allocate a range of vpids for the daemons.
*/
num_nodes = opal_list_get_size(&nodes);
num_nodes = opal_list_get_size(&map->nodes);
if (num_nodes == 0) {
return ORTE_ERR_BAD_PARAM;
}
@ -206,12 +204,12 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
nodelist_argv = NULL;
nodelist_argc = 0;
for (item = opal_list_get_first(&nodes);
item != opal_list_get_end(&nodes);
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
opal_argv_append(&nodelist_argc, &nodelist_argv, node->node_name);
opal_argv_append(&nodelist_argc, &nodelist_argv, node->nodename);
}
nodelist_flat = opal_argv_join(nodelist_argv, ',');
asprintf(&tmp, "--nodelist=%s", nodelist_flat);
@ -308,80 +306,59 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
}
}
/* Bookkeeping -- save the node names */
/* Copy the prefix-directory specified in the
corresponding app_context. If there are multiple,
different prefix's in the app context, complain (i.e., only
allow one --prefix option for the entire slurm run -- we
don't support different --prefix'es for different nodes in
the SLURM pls) */
cur_prefix = NULL;
for (item = opal_list_get_first(&nodes);
item != opal_list_get_end(&nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
opal_list_t map;
size_t num_processes;
for (i=0; i < map->num_apps; i++) {
char * app_prefix_dir = map->apps[i]->prefix_dir;
/* Check for already set cur_prefix_dir -- if different,
complain */
if (NULL != app_prefix_dir) {
if (NULL != cur_prefix &&
0 != strcmp (cur_prefix, app_prefix_dir)) {
opal_show_help("help-pls-slurm.txt", "multiple-prefixes",
true, cur_prefix, app_prefix_dir);
return ORTE_ERR_FATAL;
}
OBJ_CONSTRUCT(&map, opal_list_t);
/* Get the mapping of this very node */
rc = orte_rmaps_base_get_node_map(orte_process_info.my_name->cellid,
jobid,
node->node_name,
&map);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* Copy the prefix-directory specified within the
corresponding app_context. If there are multiple,
different prefix's in the app context, complain (i.e., only
allow one --prefix option for the entire slurm run -- we
don't support different --prefix'es for different nodes in
the SLURM pls) */
num_processes = 0;
for (item2 = opal_list_get_first(&map);
item2 != opal_list_get_end(&map);
item2 = opal_list_get_next(item2)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item2;
char * app_prefix_dir = map->app->prefix_dir;
/* Increment the number of processes allocated to this node
* This allows us to accurately test for oversubscription */
num_processes += map->num_procs;
/* Check for already set cur_prefix_dir -- if different,
complain */
if (NULL != app_prefix_dir) {
if (NULL != cur_prefix &&
0 != strcmp (cur_prefix, app_prefix_dir)) {
opal_show_help("help-pls-slurm.txt", "multiple-prefixes",
true, cur_prefix, app_prefix_dir);
return ORTE_ERR_FATAL;
}
/* If not yet set, copy it; iff set, then it's the
same anyway */
if (NULL == cur_prefix) {
cur_prefix = strdup(map->app->prefix_dir);
if (mca_pls_slurm_component.debug) {
opal_output (0, "pls:slurm: Set prefix:%s",
cur_prefix);
}
/* If not yet set, copy it; iff set, then it's the
same anyway */
if (NULL == cur_prefix) {
cur_prefix = strdup(app_prefix_dir);
if (mca_pls_slurm_component.debug) {
opal_output (0, "pls:slurm: Set prefix:%s",
cur_prefix);
}
}
}
}
/* setup the daemon info for each node */
vpid = 0;
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
/* record the daemons info for this node */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->cell = node->node_cellid;
dmn->nodename = strdup(node->node_name);
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(dmn->name), node->node_cellid, 0, vpid))) {
dmn->active_job = jobid;
dmn->cell = node->cell;
dmn->nodename = strdup(node->nodename);
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(dmn->name), dmn->cell, 0, vpid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
opal_list_append(&daemons, &dmn->super);
vpid++;
}
/* store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc);
}
@ -390,29 +367,6 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
var = mca_base_param_environ_variable("seed", NULL, NULL);
opal_setenv(var, "0", true, &env);
#if 0
/* JMS What to do for sched_yield? */
/* set the progress engine schedule for this node. if node_slots
is set to zero, then we default to NOT being oversubscribed */
if (node->node_slots > 0 &&
num_processes > node->node_slots) {
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
node->node_slots, num_processes);
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "1", true, &env);
} else {
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "0", true, &env);
}
free(var);
#endif
/* exec the daemon */
rc = pls_slurm_start_proc(argc, argv, env, cur_prefix);
if (ORTE_SUCCESS != rc) {
@ -424,16 +378,6 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
/* JMS: how do we catch when srun dies? */
cleanup:
while (NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
while (NULL != (item = opal_list_remove_first(&mapping_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping_list);
while (NULL != (item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(item);
}

Просмотреть файл

@ -58,12 +58,10 @@
#include "orte/mca/smr/smr.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/sds/base/base.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/ns/ns.h"
/* needs to be cleaned up for ORTE 2.0 */
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/pls/base/pls_private.h"
#include "pls_tm.h"
@ -118,14 +116,16 @@ extern char **environ;
static int pls_tm_launch_job(orte_jobid_t jobid)
{
opal_list_t mapping;
opal_list_item_t *m_item, *n_item;
orte_job_map_t *map;
opal_list_item_t *item;
size_t num_nodes;
orte_vpid_t vpid;
int node_name_index;
int proc_name_index;
char *jobid_string;
char *uri, *param;
char **env;
char *var;
char **argv;
int argc;
int rc;
@ -139,24 +139,17 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
opal_list_t daemons;
orte_pls_daemon_info_t *dmn;
/* Query the list of nodes allocated and mapped to this job.
/* Query the map for this job.
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
*/
OBJ_CONSTRUCT(&mapping, opal_list_t);
rc = orte_rmaps_base_get_map(jobid, &mapping);
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
num_nodes = 0;
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
num_nodes += opal_list_get_size(&map->nodes);
}
num_nodes = opal_list_get_size(&map->nodes);
/*
* Allocate a range of vpids for the daemons.
@ -286,174 +279,139 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
lib_base = opal_basename(OPAL_LIBDIR);
bin_base = opal_basename(OPAL_BINDIR);
/*
* iterate through each of the contexts
*/
for (m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
char** env;
char* var;
/* setup environment */
env = opal_argv_copy(environ);
var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env);
/* setup environment */
env = opal_argv_copy(environ);
var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env);
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables. */
if (NULL != map->app->prefix_dir) {
char *newenv;
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables. We only allow
a single prefix to be specified. Since there will
always be at least one app_context, we take it from
there
*/
if (NULL != map->apps[0]->prefix_dir) {
char *newenv;
for (i = 0; NULL != env && NULL != env[i]; ++i) {
/* Reset PATH */
if (0 == strncmp("PATH=", env[i], 5)) {
asprintf(&newenv, "%s/%s:%s",
map->apps[0]->prefix_dir, bin_base, env[i] + 5);
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: resetting PATH: %s",
newenv);
}
opal_setenv("PATH", newenv, true, &env);
free(newenv);
}
for (i = 0; NULL != env && NULL != env[i]; ++i) {
/* Reset PATH */
if (0 == strncmp("PATH=", env[i], 5)) {
asprintf(&newenv, "%s/%s:%s",
map->app->prefix_dir, bin_base, env[i] + 5);
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: resetting PATH: %s",
newenv);
}
opal_setenv("PATH", newenv, true, &env);
free(newenv);
}
/* Reset LD_LIBRARY_PATH */
else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) {
asprintf(&newenv, "%s/%s:%s",
map->app->prefix_dir, lib_base, env[i] + 16);
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: resetting LD_LIBRARY_PATH: %s",
newenv);
}
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
free(newenv);
}
}
/* Reset LD_LIBRARY_PATH */
else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) {
asprintf(&newenv, "%s/%s:%s",
map->apps[0]->prefix_dir, lib_base, env[i] + 16);
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: resetting LD_LIBRARY_PATH: %s",
newenv);
}
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
free(newenv);
}
}
}
/* Do a quick sanity check to ensure that we can find the
orted in the PATH */
if (ORTE_SUCCESS !=
(rc = pls_tm_check_path(argv[0], env))) {
ORTE_ERROR_LOG(rc);
opal_show_help("help-pls-tm.txt", "daemon-not-found",
true, argv[0]);
goto cleanup;
}
/* Do a quick sanity check to ensure that we can find the
orted in the PATH */
/* Iterate through each of the nodes and spin
* up a daemon.
*/
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(n_item)) {
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
orte_process_name_t* name;
char* name_string;
if (ORTE_SUCCESS !=
(rc = pls_tm_check_path(argv[0], env))) {
/* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid;
opal_list_append(&daemons, &dmn->super);
/* setup node name */
free(argv[node_name_index]);
argv[node_name_index] = strdup(node->nodename);
/* record the node name in the daemon struct */
dmn->cell = node->cell;
dmn->nodename = strdup(node->nodename);
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, node->cell, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
opal_show_help("help-pls-tm.txt", "daemon-not-found",
true, argv[0]);
goto cleanup;
}
/* Iterate through each of the nodes and spin
* up a daemon.
*/
for (n_item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
orte_ras_node_t* node = rmaps_node->node;
orte_process_name_t* name;
char* name_string;
/* already launched on this node */
if (0 != node->node_launched++) {
continue;
}
/* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid;
opal_list_append(&daemons, &dmn->super);
/* setup node name */
free(argv[node_name_index]);
argv[node_name_index] = strdup(node->node_name);
/* record the node name in the daemon struct */
dmn->cell = node->node_cellid;
dmn->nodename = strdup(node->node_name);
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* save it in the daemon struct */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* setup per-node options */
if (mca_pls_tm_component.debug ||
mca_pls_tm_component.verbose) {
opal_output(0, "pls:tm: launching on node %s",
node->node_name);
}
/* setup process name */
rc = orte_ns.get_proc_name_string(&name_string, name);
if (ORTE_SUCCESS != rc) {
opal_output(0, "pls:tm: unable to create process name");
return rc;
}
free(argv[proc_name_index]);
argv[proc_name_index] = strdup(name_string);
/* set the progress engine schedule for this node.
* if node_slots is set to zero, then we default to
* NOT being oversubscribed
*/
if (node->node_slots > 0 &&
(orte_std_cntr_t)opal_list_get_size(&rmaps_node->node_procs) > node->node_slots) {
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
node->node_slots,
opal_list_get_size(&rmaps_node->node_procs));
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "1", true, &env);
} else {
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "0", true, &env);
}
free(var);
/* exec the daemon */
if (mca_pls_tm_component.debug) {
param = opal_argv_join(argv, ' ');
if (NULL != param) {
opal_output(0, "pls:tm: executing: %s", param);
free(param);
}
}
rc = pls_tm_start_proc(node->node_name, argc, argv, env,
tm_task_ids + launched,
tm_events + launched);
if (ORTE_SUCCESS != rc) {
opal_output(0, "pls:tm: start_procs returned error %d", rc);
goto cleanup;
}
launched++;
++vpid;
free(name);
/* Allow some progress to occur */
opal_event_loop(OPAL_EVLOOP_NONBLOCK);
/* save it in the daemon struct */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* setup per-node options */
if (mca_pls_tm_component.debug ||
mca_pls_tm_component.verbose) {
opal_output(0, "pls:tm: launching on node %s",
node->nodename);
}
/* setup process name */
rc = orte_ns.get_proc_name_string(&name_string, name);
if (ORTE_SUCCESS != rc) {
opal_output(0, "pls:tm: unable to create process name");
return rc;
}
free(argv[proc_name_index]);
argv[proc_name_index] = strdup(name_string);
/* exec the daemon */
if (mca_pls_tm_component.debug) {
param = opal_argv_join(argv, ' ');
if (NULL != param) {
opal_output(0, "pls:tm: executing: %s", param);
free(param);
}
}
rc = pls_tm_start_proc(node->nodename, argc, argv, env,
tm_task_ids + launched,
tm_events + launched);
if (ORTE_SUCCESS != rc) {
opal_output(0, "pls:tm: start_procs returned error %d", rc);
goto cleanup;
}
launched++;
++vpid;
free(name);
/* Allow some progress to occur */
opal_event_loop(OPAL_EVLOOP_NONBLOCK);
}
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm:launch: finished spawning orteds\n");
}
/* all done, so store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc);
}
@ -478,10 +436,6 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
free(tm_task_ids);
}
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&mapping);
if (NULL != lib_base) {
free(lib_base);
}
@ -490,8 +444,8 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
}
/* deconstruct the daemon list */
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(m_item);
while (NULL != (item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&daemons);

Просмотреть файл

@ -30,7 +30,7 @@
#import "orte/mca/pls/pls.h"
#import "orte/mca/errmgr/errmgr.h"
#import "orte/mca/ras/ras_types.h"
#import "orte/mca/rmaps/base/rmaps_private.h"
#import "orte/mca/rmaps/rmaps.h"
#import "orte/mca/smr/smr.h"
#import "pls_xgrid_client.h"
@ -229,8 +229,8 @@ char **environ;
-(int) launchJob:(orte_jobid_t) jobid
{
opal_list_t mapping;
opal_list_item_t *m_item, *n_item;
orte_job_map_t *map;
opal_list_item_t *item;
size_t num_nodes;
orte_vpid_t vpid;
int rc, i = 0;
@ -239,24 +239,17 @@ char **environ;
char *orted_path;
char *nsuri = NULL, *gpruri = NULL;
/* Query the list of nodes allocated and mapped to this job.
/* Query the map for this job.
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
*/
OBJ_CONSTRUCT(&mapping, opal_list_t);
rc = orte_rmaps_base_get_map(jobid, &mapping);
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
num_nodes = 0;
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
num_nodes += opal_list_get_size(&map->nodes);
}
num_nodes = opal_list_get_size(&map->nodes);
/*
* Allocate a range of vpids for the daemons.
@ -300,84 +293,69 @@ char **environ;
/* build up the array of task specifications */
NSMutableDictionary *taskSpecifications = [NSMutableDictionary dictionary];
/*
* iterate through each of the contexts
/* Iterate through each of the nodes and spin
* up a daemon.
*/
for (m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(n_item)) {
orte_mapped_node_t* rmaps_node = (orte_mapped_node_t*)item;
orte_process_name_t* name;
char* name_string;
/* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid;
opal_list_append(&daemons, &dmn->super);
/* record the node name in the daemon struct */
dmn->cell = node->cell;
dmn->nodename = strdup(node->nodename);
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, node->cell, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* save it in the daemon struct */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* setup per-node options */
opal_output_verbose(1, orte_pls_base.pls_output,
"orte:pls:xgrid: launching on node %s",
node->nodename);
/* setup process name */
rc = orte_ns.get_proc_name_string(&name_string, name);
if (ORTE_SUCCESS != rc) {
opal_output(orte_pls_base.pls_output,
"orte:pls:xgrid: unable to create process name");
return rc;
}
/* Iterate through each of the nodes and spin
* up a daemon.
*/
for (n_item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
orte_ras_node_t* node = rmaps_node->node;
orte_process_name_t* name;
char* name_string;
/* already launched on this node */
if (0 != node->node_launched++) {
continue;
}
/* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid;
opal_list_append(&daemons, &dmn->super);
/* record the node name in the daemon struct */
dmn->cell = node->node_cellid;
dmn->nodename = strdup(node->node_name);
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* save it in the daemon struct */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* setup per-node options */
opal_output_verbose(1, orte_pls_base.pls_output,
"orte:pls:xgrid: launching on node %s",
node->node_name);
/* setup process name */
rc = orte_ns.get_proc_name_string(&name_string, name);
if (ORTE_SUCCESS != rc) {
opal_output(orte_pls_base.pls_output,
"orte:pls:xgrid: unable to create process name");
return rc;
}
NSMutableDictionary *task = [NSMutableDictionary dictionary];
[task setObject: [NSString stringWithCString: orted_path]
forKey: XGJobSpecificationCommandKey];
NSArray *taskArguments =
[NSArray arrayWithObjects: @"--no-daemonize",
@"--bootproxy", [NSString stringWithFormat: @"%d", jobid],
@"--name", [NSString stringWithCString: name_string],
@"--num_procs", [NSString stringWithFormat: @"%d", 1],
@"--nodename", [NSString stringWithCString: node->node_name],
@"--nsreplica", [NSString stringWithCString: nsuri],
@"--gprreplica", [NSString stringWithCString: gpruri],
nil];
[task setObject: taskArguments forKey: XGJobSpecificationArgumentsKey];
[taskSpecifications setObject: task
forKey: [NSString stringWithFormat: @"%d", i]];
vpid++; i++;
}
NSMutableDictionary *task = [NSMutableDictionary dictionary];
[task setObject: [NSString stringWithCString: orted_path]
forKey: XGJobSpecificationCommandKey];
NSArray *taskArguments =
[NSArray arrayWithObjects: @"--no-daemonize",
@"--bootproxy", [NSString stringWithFormat: @"%d", jobid],
@"--name", [NSString stringWithCString: name_string],
@"--num_procs", [NSString stringWithFormat: @"%d", 1],
@"--nodename", [NSString stringWithCString: node->nodename],
@"--nsreplica", [NSString stringWithCString: nsuri],
@"--gprreplica", [NSString stringWithCString: gpruri],
nil];
[task setObject: taskArguments forKey: XGJobSpecificationArgumentsKey];
[taskSpecifications setObject: task
forKey: [NSString stringWithFormat: @"%d", i]];
vpid++; i++;
}
/* job specification */
@ -419,7 +397,7 @@ char **environ;
forKey: [NSString stringWithFormat: @"%d", jobid]];
/* all done, so store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc);
}
@ -427,11 +405,6 @@ cleanup:
if (NULL != nsuri) free(nsuri);
if (NULL != gpruri) free(gpruri);
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&mapping);
/* deconstruct the daemon list */
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(m_item);

Просмотреть файл

@ -20,14 +20,14 @@ dist_pkgdata_DATA = base/help-orte-rmaps-base.txt
headers += \
base/base.h \
base/rmaps_class_instances.h \
base/rmaps_private.h
libmca_rmaps_la_SOURCES += \
base/rmaps_base_close.c \
base/rmaps_base_map.c \
base/rmaps_base_registry_fns.c \
base/rmaps_base_map_job.c \
base/rmaps_base_node.c \
base/rmaps_base_no_ops.c \
base/rmaps_base_support_fns.c \
base/rmaps_base_open.c \
base/rmaps_base_receive.c \
base/rmaps_base_find_avail.c \

Просмотреть файл

@ -29,14 +29,14 @@
/*
* JOB_MAP
*/
int orte_rmaps_base_compare_map(orte_rmaps_base_map_t *value1, orte_rmaps_base_map_t *value2, orte_data_type_t type)
int orte_rmaps_base_compare_map(orte_job_map_t *value1, orte_job_map_t *value2, orte_data_type_t type)
{
return ORTE_EQUAL;
}
/* MAPPED_PROC */
int orte_rmaps_base_compare_mapped_proc(orte_rmaps_base_proc_t *value1, orte_rmaps_base_proc_t *value2, orte_data_type_t type)
int orte_rmaps_base_compare_mapped_proc(orte_mapped_proc_t *value1, orte_mapped_proc_t *value2, orte_data_type_t type)
{
return ORTE_EQUAL;
}
@ -44,7 +44,7 @@ int orte_rmaps_base_compare_mapped_proc(orte_rmaps_base_proc_t *value1, orte_rma
/* MAPPED_NODE */
int orte_rmaps_base_compare_mapped_node(orte_rmaps_base_node_t *value1, orte_rmaps_base_node_t *value2, orte_data_type_t type)
int orte_rmaps_base_compare_mapped_node(orte_mapped_node_t *value1, orte_mapped_node_t *value2, orte_data_type_t type)
{
return ORTE_EQUAL;
}

Просмотреть файл

@ -34,12 +34,12 @@
/*
* JOB_MAP
*/
int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t *src, orte_data_type_t type)
int orte_rmaps_base_copy_map(orte_job_map_t **dest, orte_job_map_t *src, orte_data_type_t type)
{
orte_std_cntr_t i;
int rc;
opal_list_item_t *item;
orte_rmaps_base_node_t *srcnode, *nodeptr;
orte_mapped_node_t *srcnode, *nodeptr;
if (NULL == src) {
*dest = NULL;
@ -47,34 +47,34 @@ int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t
}
/* create the new object */
*dest = OBJ_NEW(orte_rmaps_base_map_t);
*dest = OBJ_NEW(orte_job_map_t);
if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* copy data into it */
(*dest)->app = src->app;
(*dest)->job = src->job;
(*dest)->num_apps = src->num_apps;
(*dest)->procs = (orte_rmaps_base_proc_t**)malloc(src->num_procs * sizeof(orte_rmaps_base_proc_t));
if (NULL == (*dest)->procs) {
(*dest)->apps = (orte_app_context_t**)malloc(src->num_apps * sizeof(orte_app_context_t*));
if (NULL == (*dest)->apps) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(*dest);
return ORTE_ERR_OUT_OF_RESOURCE;
}
for (i=0; i < src->num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_proc(&((*dest)->procs[i]), src->procs[i], ORTE_MAPPED_PROC))) {
for (i=0; i < src->num_apps; i++) {
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((*dest)->apps[i]), src->apps[i], ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest);
return rc;
}
}
(*dest)->num_procs = src->num_procs;
for (item = opal_list_get_first(&(src->nodes));
item != opal_list_get_end(&(src->nodes));
item = opal_list_get_next(item)) {
srcnode = (orte_rmaps_base_node_t*)item;
srcnode = (orte_mapped_node_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_node(&nodeptr, srcnode, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest);
@ -89,52 +89,40 @@ int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t
/*
* MAPPED_PROC
*/
int orte_rmaps_base_copy_mapped_proc(orte_rmaps_base_proc_t **dest, orte_rmaps_base_proc_t *src, orte_data_type_t type)
int orte_rmaps_base_copy_mapped_proc(orte_mapped_proc_t **dest, orte_mapped_proc_t *src, orte_data_type_t type)
{
int rc;
if (NULL == src) {
*dest = NULL;
return ORTE_SUCCESS;
}
/* create the new object */
*dest = OBJ_NEW(orte_rmaps_base_proc_t);
*dest = OBJ_NEW(orte_mapped_proc_t);
if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* copy data into it */
if (NULL != src->app) {
(*dest)->app = strdup(src->app);
}
(*dest)->name = src->name;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_node(&((*dest)->proc_node), src->proc_node, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest);
return rc;
}
(*dest)->rank = src->rank;
(*dest)->proc_name = src->proc_name;
(*dest)->proc_rank = src->proc_rank;
(*dest)->app_idx = src->app_idx;
(*dest)->pid = src->pid;
(*dest)->local_pid = src->local_pid;
return ORTE_SUCCESS;
}
/*
* MAPPED_NODE
*/
int orte_rmaps_base_copy_mapped_node(orte_rmaps_base_node_t **dest, orte_rmaps_base_node_t *src, orte_data_type_t type)
int orte_rmaps_base_copy_mapped_node(orte_mapped_node_t **dest, orte_mapped_node_t *src, orte_data_type_t type)
{
int rc;
opal_list_item_t *item;
orte_rmaps_base_proc_t *srcproc, *procptr;
orte_mapped_proc_t *srcproc, *procptr;
if (NULL == src) {
*dest = NULL;
@ -142,29 +130,43 @@ int orte_rmaps_base_copy_mapped_node(orte_rmaps_base_node_t **dest, orte_rmaps_b
}
/* create the new object */
*dest = OBJ_NEW(orte_rmaps_base_node_t);
*dest = OBJ_NEW(orte_mapped_node_t);
if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* copy data into it */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((*dest)->node), src->node, ORTE_RAS_NODE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest);
return rc;
(*dest)->cell = src->cell;
if (NULL != src->nodename) {
(*dest)->nodename = strdup(src->nodename);
}
if (NULL != src->username) {
(*dest)->username = strdup(src->username);
}
for (item = opal_list_get_first(&(src->node_procs));
item != opal_list_get_end(&(src->node_procs));
if (NULL != src->daemon) {
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((*dest)->daemon), src->daemon, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest);
return rc;
}
}
(*dest)->oversubscribed = src->oversubscribed;
for (item = opal_list_get_first(&(src->procs));
item != opal_list_get_end(&(src->procs));
item = opal_list_get_next(item)) {
srcproc = (orte_rmaps_base_proc_t*)item;
srcproc = (orte_mapped_proc_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_proc(&procptr, srcproc, ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest);
return rc;
}
opal_list_append(&((*dest)->node_procs), &procptr->super);
opal_list_append(&((*dest)->procs), &procptr->super);
}
return ORTE_SUCCESS;

Просмотреть файл

@ -34,33 +34,32 @@
* JOB_MAP
*/
int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type)
orte_std_cntr_t num_vals, orte_data_type_t type)
{
int rc;
orte_std_cntr_t i, num_nodes;
orte_rmaps_base_map_t **maps;
orte_job_map_t **maps;
opal_list_item_t *item;
orte_rmaps_base_node_t *srcnode;
orte_mapped_node_t *srcnode;
/* array of pointers to orte_rmaps_base_map_t objects - need to pack the objects a set of fields at a time */
maps = (orte_rmaps_base_map_t**) src;
/* array of pointers to orte_job_map_t objects - need to pack the objects a set of fields at a time */
maps = (orte_job_map_t**) src;
for (i=0; i < num_vals; i++) {
/* pack the app_context */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, maps[i]->app, 1, ORTE_APP_CONTEXT))) {
/* pack the jobid this map is for */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->job), 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the number of procs */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->num_procs), 1, ORTE_STD_CNTR))) {
/* pack the number of app_contexts */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->num_apps), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the procs array */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(maps[i]->procs),
maps[i]->num_procs, ORTE_MAPPED_PROC))) {
/* pack the app_contexts */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, maps[i]->apps, maps[i]->num_apps, ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -77,7 +76,7 @@ int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
for (item = opal_list_get_first(&(maps[i]->nodes));
item != opal_list_get_end(&(maps[i]->nodes));
item = opal_list_get_next(item)) {
srcnode = (orte_rmaps_base_node_t*)item;
srcnode = (orte_mapped_node_t*)item;
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)srcnode,
1, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
@ -99,45 +98,33 @@ int orte_rmaps_base_pack_mapped_proc(orte_buffer_t *buffer, void *src,
{
int rc;
orte_std_cntr_t i;
orte_rmaps_base_proc_t **procs;
orte_mapped_proc_t **procs;
/* array of pointers to orte_rmaps_base_proc_t objects - need to pack the objects a set of fields at a time */
procs = (orte_rmaps_base_proc_t**) src;
/* array of pointers to orte_mapped_proc_t objects - need to pack the objects a set of fields at a time */
procs = (orte_mapped_proc_t**) src;
for (i=0; i < num_vals; i++) {
/* pack the app */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, procs[i]->app, 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the proc_node */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, procs[i]->proc_node, 1, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the proc name */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(&(procs[i]->proc_name)),
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(&(procs[i]->name)),
1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the rank */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->proc_rank), 1, ORTE_STD_CNTR))) {
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->rank), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the pls-pid */
/* pack the pid */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->pid), 1, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the local pid */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->local_pid), 1, ORTE_PID))) {
/* pack the app_idx */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->app_idx), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -155,22 +142,46 @@ int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
{
int rc;
orte_std_cntr_t i, num_procs;
orte_rmaps_base_node_t **nodes;
orte_mapped_node_t **nodes;
opal_list_item_t *item;
orte_rmaps_base_proc_t *srcproc;
orte_mapped_proc_t *srcproc;
/* array of pointers to orte_rmaps_base_node_t objects - need to pack the objects a set of fields at a time */
nodes = (orte_rmaps_base_node_t**) src;
/* array of pointers to orte_mapped_node_t objects - need to pack the objects a set of fields at a time */
nodes = (orte_mapped_node_t**) src;
for (i=0; i < num_vals; i++) {
/* pack the node object */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, nodes[i]->node, 1, ORTE_RAS_NODE))) {
/* pack the cellid */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->cell), 1, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the nodename */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->nodename), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the username */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->username), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the daemon's name */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->daemon), 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the oversubscribed flag */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->oversubscribed), 1, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the number of procs */
num_procs = (orte_std_cntr_t)opal_list_get_size(&(nodes[i]->node_procs));
num_procs = (orte_std_cntr_t)opal_list_get_size(&(nodes[i]->procs));
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &num_procs, 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -178,10 +189,10 @@ int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
/* pack the procs list */
if (0 < num_procs) {
for (item = opal_list_get_first(&(nodes[i]->node_procs));
item != opal_list_get_end(&(nodes[i]->node_procs));
for (item = opal_list_get_first(&(nodes[i]->procs));
item != opal_list_get_end(&(nodes[i]->procs));
item = opal_list_get_next(item)) {
srcproc = (orte_rmaps_base_proc_t*)item;
srcproc = (orte_mapped_proc_t*)item;
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)srcproc,
1, ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -34,10 +34,10 @@
/*
* JOB_MAP
*/
int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t *src, orte_data_type_t type)
int orte_rmaps_base_print_map(char **output, char *prefix, orte_job_map_t *src, orte_data_type_t type)
{
char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
orte_rmaps_base_node_t *srcnode;
orte_mapped_node_t *srcnode;
orte_std_cntr_t i, num_nodes;
opal_list_item_t *item;
int rc;
@ -52,32 +52,22 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "%sMap for app_context:", pfx2);
asprintf(&tmp, "%sMap for job: %ld\tNum app_contexts: %ld", pfx2, (long)src->job, (long)src->num_apps);
asprintf(&pfx, "%s\t", pfx2);
free(pfx2);
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->app, ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc);
free(pfx);
free(tmp);
return rc;
}
asprintf(&tmp3, "%s\n%s\n%sNum elements in procs array: %ld", tmp, tmp2, pfx, (long)src->num_procs);
free(tmp);
free(tmp2);
for (i=0; i < src->num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_proc(&tmp, pfx, src->procs[i], ORTE_MAPPED_PROC))) {
for (i=0; i < src->num_apps; i++) {
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->apps[i], ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc);
free(pfx);
free(tmp3);
free(tmp);
return rc;
}
asprintf(&tmp2, "%s\n%s", tmp3, tmp);
asprintf(&tmp3, "%s\n%s", tmp, tmp2);
free(tmp);
free(tmp3);
tmp3 = tmp2;
free(tmp2);
tmp = tmp3;
}
num_nodes = (orte_std_cntr_t)opal_list_get_size(&(src->nodes));
@ -86,7 +76,7 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t
for (item = opal_list_get_first(&(src->nodes));
item != opal_list_get_end(&(src->nodes));
item = opal_list_get_next(item)) {
srcnode = (orte_rmaps_base_node_t*)item;
srcnode = (orte_mapped_node_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_node(&tmp2, pfx, srcnode, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
free(pfx);
@ -110,7 +100,7 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t
/*
* MAPPED_PROC
*/
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_base_proc_t *src, orte_data_type_t type)
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_mapped_proc_t *src, orte_data_type_t type)
{
char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
int rc;
@ -125,35 +115,18 @@ int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_ba
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "%sMapped proc:", pfx2);
asprintf(&tmp3, "%sMapped proc:\n%s\tProc Name:", pfx2, pfx2);
asprintf(&pfx, "%s\t", pfx2);
if (NULL != src->app) {
asprintf(&tmp2, "%s\n%sApp name: %s", tmp, pfx, src->app);
} else {
asprintf(&tmp2, "%s\n%sApplication has NULL name", tmp, pfx);
}
free(tmp);
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_node(&tmp, pfx, src->proc_node, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
free(pfx);
free(tmp2);
return rc;
}
asprintf(&tmp3, "%s\n%s\n%s\n%sProc Name:", tmp2, pfx, tmp, pfx);
free(tmp2);
free(tmp);
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, &(src->proc_name), ORTE_NAME))) {
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, &(src->name), ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
free(pfx);
free(tmp3);
return rc;
}
asprintf(&tmp, "%s\n%s\n%sProc Rank: %ld\tPLS pid: %ld\tLocal PID: %ld\n", tmp3, tmp2, pfx,
(long)src->proc_rank, (long)src->pid, (long)src->local_pid);
asprintf(&tmp, "%s\n%s\n%sProc Rank: %ld\tProc PID: %ld\tApp_context index: %ld\n", tmp3, tmp2, pfx,
(long)src->rank, (long)src->pid, (long)src->app_idx);
free(tmp2);
free(tmp3);
@ -168,15 +141,13 @@ int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_ba
/*
* MAPPED_NODE
*/
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_base_node_t *src, orte_data_type_t type)
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_node_t *src, orte_data_type_t type)
{
int rc;
char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
orte_std_cntr_t num_procs;
#if 0
opal_list_item_t *item;
orte_rmaps_base_proc_t *srcproc;
#endif
orte_mapped_proc_t *srcproc;
/* set default result */
*output = NULL;
@ -187,28 +158,31 @@ int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_ba
} else {
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "%sMapped node:", pfx2);
asprintf(&tmp, "%sMapped node:\n%s\tCell: %ld\tNodename: %s\tUsername: %s\n%s\tDaemon name:", pfx2, pfx2,
(long)src->cell, (NULL == src->nodename ? "NULL" : src->nodename),
(NULL == src->username ? "NULL" : src->username), pfx2);
asprintf(&pfx, "%s\t", pfx2);
free(pfx2);
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->node, ORTE_RAS_NODE))) {
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->daemon, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
free(pfx);
free(tmp);
return rc;
}
num_procs = (orte_std_cntr_t)opal_list_get_size(&(src->node_procs));
asprintf(&tmp3, "%s\n%s\n%sNum elements in procs list: %ld", tmp, tmp2, pfx, (long)num_procs);
num_procs = (orte_std_cntr_t)opal_list_get_size(&(src->procs));
asprintf(&tmp3, "%s\n\t%s\n%sOversubscribed: %s\tNum elements in procs list: %ld", tmp, tmp2, pfx,
(src->oversubscribed ? "True" : "False"), (long)num_procs);
free(tmp);
free(tmp2);
#if 0
for (item = opal_list_get_first(&(src->node_procs));
item != opal_list_get_end(&(src->node_procs));
for (item = opal_list_get_first(&(src->procs));
item != opal_list_get_end(&(src->procs));
item = opal_list_get_next(item)) {
srcproc = (orte_rmaps_base_proc_t*)item;
srcproc = (orte_mapped_proc_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_proc(&tmp2, pfx, srcproc, ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc);
free(pfx);
@ -220,7 +194,7 @@ int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_ba
free(tmp2);
tmp3 = tmp;
}
#endif
/* set the return */
*output = tmp3;

Просмотреть файл

@ -32,10 +32,10 @@
/*
* JOB_MAP
*/
int orte_rmaps_base_size_map(size_t *size, orte_rmaps_base_map_t *src, orte_data_type_t type)
int orte_rmaps_base_size_map(size_t *size, orte_job_map_t *src, orte_data_type_t type)
{
/* account for the object itself */
*size = sizeof(orte_rmaps_base_map_t);
*size = sizeof(orte_job_map_t);
/* if src is NULL, then that's all we wanted */
if (NULL == src) return ORTE_SUCCESS;
@ -46,10 +46,10 @@ int orte_rmaps_base_size_map(size_t *size, orte_rmaps_base_map_t *src, orte_data
/*
* MAPPED_PROC
*/
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_rmaps_base_proc_t *src, orte_data_type_t type)
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_mapped_proc_t *src, orte_data_type_t type)
{
/* account for the object itself */
*size = sizeof(orte_rmaps_base_proc_t);
*size = sizeof(orte_mapped_proc_t);
/* if src is NULL, then that's all we wanted */
if (NULL == src) return ORTE_SUCCESS;
@ -60,10 +60,10 @@ int orte_rmaps_base_size_mapped_proc(size_t *size, orte_rmaps_base_proc_t *src,
/*
* MAPPED_NODE
*/
int orte_rmaps_base_size_mapped_node(size_t *size, orte_rmaps_base_node_t *src, orte_data_type_t type)
int orte_rmaps_base_size_mapped_node(size_t *size, orte_mapped_node_t *src, orte_data_type_t type)
{
/* account for the object itself */
*size = sizeof(orte_rmaps_base_node_t);
*size = sizeof(orte_mapped_node_t);
/* if src is NULL, then that's all we wanted */
if (NULL == src) return ORTE_SUCCESS;

Просмотреть файл

@ -40,50 +40,49 @@ int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest,
{
int rc;
orte_std_cntr_t i, j, n, num_nodes;
orte_rmaps_base_map_t **maps;
orte_rmaps_base_node_t *node;
orte_job_map_t **maps;
orte_mapped_node_t *node;
/* unpack into array of orte_rmaps_base_map_t objects */
maps = (orte_rmaps_base_map_t**) dest;
/* unpack into array of orte_job_map_t objects */
maps = (orte_job_map_t**) dest;
for (i=0; i < *num_vals; i++) {
/* create the orte_rmaps_base_map_t object */
maps[i] = OBJ_NEW(orte_rmaps_base_map_t);
maps[i] = OBJ_NEW(orte_job_map_t);
if (NULL == maps[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* unpack the jobid */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->job), &n, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the number of app_contexts */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->num_apps), &n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* allocate space for them */
maps[i]->apps = (orte_app_context_t**)malloc(maps[i]->num_apps * sizeof(orte_app_context_t*));
if (NULL == maps[i]->apps) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* unpack the app_context */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->app), &n, ORTE_APP_CONTEXT))) {
&(maps[i]->apps), &(maps[i]->num_apps), ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the number of procs */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->num_procs), &n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if we have some, allocate space for them */
if (0 < maps[i]->num_procs) {
maps[i]->procs = (orte_rmaps_base_proc_t**)malloc(maps[i]->num_procs * sizeof(orte_rmaps_base_proc_t*));
if (NULL == maps[i]->procs) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* and unpack them */
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, maps[i]->procs, &(maps[i]->num_procs), ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* unpack the number of nodes */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &num_nodes, &n, ORTE_STD_CNTR))) {
@ -112,39 +111,23 @@ int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
{
int rc;
orte_std_cntr_t i, n;
orte_rmaps_base_proc_t **procs;
orte_mapped_proc_t **procs;
/* unpack into array of orte_rmaps_base_proc_t objects */
procs = (orte_rmaps_base_proc_t**) dest;
/* unpack into array of orte_mapped_proc_t objects */
procs = (orte_mapped_proc_t**) dest;
for (i=0; i < *num_vals; i++) {
/* create the orte_rmaps_base_proc_t object */
procs[i] = OBJ_NEW(orte_rmaps_base_proc_t);
/* create the orte_mapped_proc_t object */
procs[i] = OBJ_NEW(orte_mapped_proc_t);
if (NULL == procs[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* unpack the app name */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->app), &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the proc_node */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->proc_node), &n, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the proc name */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->proc_name), &n, ORTE_NAME))) {
&(procs[i]->name), &n, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -152,12 +135,12 @@ int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
/* unpack the rank */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->proc_rank), &n, ORTE_STD_CNTR))) {
&(procs[i]->rank), &n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the pls-pid */
/* unpack the pid */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->pid), &n, ORTE_PID))) {
@ -165,10 +148,10 @@ int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
return rc;
}
/* unpack the local pid */
/* unpack the app_idx */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->local_pid), &n, ORTE_PID))) {
&(procs[i]->app_idx), &n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -185,24 +168,56 @@ int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
{
int rc;
orte_std_cntr_t i, j, n, num_procs;
orte_rmaps_base_node_t **nodes;
orte_rmaps_base_proc_t *srcproc;
orte_mapped_node_t **nodes;
orte_mapped_proc_t *srcproc;
/* unpack into array of orte_rmaps_base_node_t objects */
nodes = (orte_rmaps_base_node_t**) dest;
/* unpack into array of orte_mapped_node_t objects */
nodes = (orte_mapped_node_t**) dest;
for (i=0; i < *num_vals; i++) {
/* create the orte_rmaps_base_node_t object */
nodes[i] = OBJ_NEW(orte_rmaps_base_node_t);
nodes[i] = OBJ_NEW(orte_mapped_node_t);
if (NULL == nodes[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* unpack the node object */
/* unpack the cellid */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->node), &n, ORTE_RAS_NODE))) {
&(nodes[i]->cell), &n, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the nodename */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->nodename), &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the username */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->username), &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the daemon's name */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->daemon), &n, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the oversubscribed flag */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->oversubscribed), &n, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -222,7 +237,7 @@ int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc);
return rc;
}
opal_list_append(&(nodes[i]->node_procs), &srcproc->super);
opal_list_append(&(nodes[i]->procs), &srcproc->super);
}
}
}

Просмотреть файл

@ -1,903 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/util/output.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/schema/schema.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/ras.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/smr/smr_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/base/base.h"
/**
* orte_rmaps_base_node_t
*/
static void orte_rmaps_base_node_construct(orte_rmaps_base_node_t* node)
{
node->node = NULL;
OBJ_CONSTRUCT(&node->node_procs, opal_list_t);
}
static void orte_rmaps_base_node_destruct(orte_rmaps_base_node_t* node)
{
opal_list_item_t* item;
if(NULL != node->node) {
OBJ_RELEASE(node->node);
node->node = NULL;
}
while(NULL != (item = opal_list_remove_first(&node->node_procs))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node->node_procs);
}
OBJ_CLASS_INSTANCE(
orte_rmaps_base_node_t,
opal_list_item_t,
orte_rmaps_base_node_construct,
orte_rmaps_base_node_destruct);
/**
* orte_rmaps_base_proc_t
*/
static void orte_rmaps_base_proc_construct(orte_rmaps_base_proc_t* proc)
{
proc->app = NULL;
proc->proc_node = NULL;
proc->pid = 0;
proc->local_pid = 0;
}
static void orte_rmaps_base_proc_destruct(orte_rmaps_base_proc_t* proc)
{
if (NULL != proc->app) {
free(proc->app);
proc->app = NULL;
}
}
OBJ_CLASS_INSTANCE(
orte_rmaps_base_proc_t,
opal_list_item_t,
orte_rmaps_base_proc_construct,
orte_rmaps_base_proc_destruct);
/**
* orte_rmaps_base_map_t
*/
static void orte_rmaps_base_map_construct(orte_rmaps_base_map_t* map)
{
map->app = NULL;
map->procs = NULL;
map->num_procs = 0;
OBJ_CONSTRUCT(&map->nodes, opal_list_t);
}
static void orte_rmaps_base_map_destruct(orte_rmaps_base_map_t* map)
{
orte_std_cntr_t i=0;
opal_list_item_t* item;
for(i=0; i<map->num_procs; i++) {
OBJ_RELEASE(map->procs[i]);
}
while(NULL != (item = opal_list_remove_first(&map->nodes)))
OBJ_RELEASE(item);
if(NULL != map->procs) {
free(map->procs);
map->procs = NULL;
}
if(NULL != map->app) {
OBJ_RELEASE(map->app);
map->app = NULL;
}
OBJ_DESTRUCT(&map->nodes);
}
OBJ_CLASS_INSTANCE(
orte_rmaps_base_map_t,
opal_list_item_t,
orte_rmaps_base_map_construct,
orte_rmaps_base_map_destruct);
/*
* Compare two proc entries
*/
static int orte_rmaps_value_compare(orte_gpr_value_t** val1, orte_gpr_value_t** val2)
{
orte_std_cntr_t i;
orte_std_cntr_t app1 = 0;
orte_std_cntr_t app2 = 0;
orte_std_cntr_t rank1 = 0;
orte_std_cntr_t rank2 = 0;
orte_std_cntr_t *sptr;
orte_gpr_value_t* value;
int rc;
for(i=0, value=*val1; i<value->cnt; i++) {
orte_gpr_keyval_t* keyval = value->keyvals[i];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
}
rank1 = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
}
app1 = *sptr;
continue;
}
}
for(i=0, value=*val2; i<value->cnt; i++) {
orte_gpr_keyval_t* keyval = value->keyvals[i];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
}
rank2 = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
}
app2 = *sptr;
continue;
}
}
if(app1 < app2)
return -1;
if(app1 > app2)
return +1;
if(rank1 < rank2)
return -1;
if(rank1 > rank2)
return +1;
return 0;
}
/**
* Obtain the mapping for this job, and the list of nodes confined to that mapping.
*
* Use this instead of orte_ras_base_node_query when past the RMAPS framework
* since components like the PLS are only conserned with those nodes that they
* been mapped on, not all of the nodes allocated to their job. In the case
* where we are allocated 10 nodes from the RAS, but only map to 2 of them
* then we don't try to launch orteds on all 10 nodes, just the 2 mapped.
*/
int orte_rmaps_base_mapped_node_query(opal_list_t* mapping_list, opal_list_t* nodes_alloc, orte_jobid_t jobid)
{
opal_list_item_t *item_a, *item_m, *item_n;
int num_mapping = 0;
int rc = ORTE_SUCCESS;
bool matched = false;
/* get the mapping for this job */
rc = orte_rmaps_base_get_map(jobid, mapping_list);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
num_mapping = opal_list_get_size(mapping_list);
/* Create a list of nodes that are in the mapping */
for( item_m = opal_list_get_first(mapping_list);
item_m != opal_list_get_end(mapping_list);
item_m = opal_list_get_next(item_m)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item_m;
/* Iterate over all the nodes mapped and check them against the
* allocated node list */
for( item_n = opal_list_get_first(&(map->nodes));
item_n != opal_list_get_end(&(map->nodes));
item_n = opal_list_get_next(item_n)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)item_n;
matched = false;
/* If this node is in the list already, skip it */
if(num_mapping > 1) {
for( item_a = opal_list_get_first(nodes_alloc);
item_a != opal_list_get_end(nodes_alloc);
item_a = opal_list_get_next(item_a)) {
orte_ras_node_t* ras_node = (orte_ras_node_t*)item_a;
if( rmaps_node->node == ras_node) {
matched = true;
break;
}
}
if(matched) {
continue;
}
}
/* Otherwise
* - Add it to the allocated list of nodes
*/
OBJ_RETAIN(rmaps_node->node);
opal_list_append(nodes_alloc, &rmaps_node->node->super);
}
}
return rc;
}
/**
* Lookup node (if it exists) in the list. If it doesn't exist, create a new
* node and append to the table.
*/
static orte_rmaps_base_node_t*
orte_rmaps_lookup_node(opal_list_t* rmaps_nodes, opal_list_t* ras_nodes, char* node_name, orte_rmaps_base_proc_t* proc)
{
opal_list_item_t* item;
for(item = opal_list_get_first(rmaps_nodes);
item != opal_list_get_end(rmaps_nodes);
item = opal_list_get_next(item)) {
orte_rmaps_base_node_t* node = (orte_rmaps_base_node_t*)item;
if(strcmp(node->node->node_name, node_name) == 0) {
OBJ_RETAIN(proc);
opal_list_append(&node->node_procs, &proc->super);
return node;
}
}
for(item = opal_list_get_first(ras_nodes);
item != opal_list_get_end(ras_nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* ras_node = (orte_ras_node_t*)item;
if(strcmp(ras_node->node_name, node_name) == 0) {
orte_rmaps_base_node_t* node = OBJ_NEW(orte_rmaps_base_node_t);
OBJ_RETAIN(ras_node);
node->node = ras_node;
OBJ_RETAIN(proc);
opal_list_append(&node->node_procs, &proc->super);
opal_list_prepend(rmaps_nodes, &node->super);
return node;
}
}
return NULL;
}
/**
* Query the process mapping from the registry.
*/
int orte_rmaps_base_get_map(orte_jobid_t jobid, opal_list_t* mapping_list)
{
orte_app_context_t** app_context = NULL;
orte_rmaps_base_map_t** mapping = NULL;
opal_list_t nodes;
opal_list_item_t* item;
orte_std_cntr_t i, num_context = 0;
orte_std_cntr_t *sptr;
orte_process_name_t *pptr;
pid_t *pidptr;
char* segment = NULL;
orte_gpr_value_t** values;
orte_std_cntr_t v, num_values;
int rc;
char* keys[] = {
ORTE_PROC_RANK_KEY,
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_PROC_PID_KEY,
ORTE_PROC_LOCAL_PID_KEY,
ORTE_NODE_NAME_KEY,
NULL
};
/* query the application context */
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &app_context, &num_context))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* query the node list */
OBJ_CONSTRUCT(&nodes, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(&nodes,jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* build the mapping */
if(NULL == (mapping = (orte_rmaps_base_map_t**)malloc(sizeof(orte_rmaps_base_map_t*) * num_context))) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(i=0; i<num_context; i++) {
orte_rmaps_base_map_t* map = OBJ_NEW(orte_rmaps_base_map_t);
orte_app_context_t* app = app_context[i];
map->app = app;
if (0 < app->num_procs) {
map->procs = (orte_rmaps_base_proc_t**)malloc(sizeof(orte_rmaps_base_proc_t*) * app->num_procs);
if(NULL == map->procs) {
OBJ_RELEASE(map);
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
} else {
if (1 < num_context) { /** can't have multiple contexts if zero num_procs */
ORTE_ERROR_LOG(ORTE_ERR_INVALID_NUM_PROCS);
rc = ORTE_ERR_INVALID_NUM_PROCS;
goto cleanup;
}
}
map->num_procs = 0;
mapping[i] = map;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* query the process list from the registry */
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
segment,
NULL,
keys,
&num_values,
&values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* sort the response */
qsort(values, num_values, sizeof(orte_gpr_value_t*),
(int (*)(const void*,const void*))orte_rmaps_value_compare);
/* build the proc list */
for(v=0; v<num_values; v++) {
orte_gpr_value_t* value = values[v];
orte_rmaps_base_map_t* map = NULL;
orte_rmaps_base_proc_t* proc;
char* node_name = NULL;
orte_std_cntr_t kv, app_index;
proc = OBJ_NEW(orte_rmaps_base_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(kv = 0; kv<value->cnt; kv++) {
orte_gpr_keyval_t* keyval = value->keyvals[kv];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->proc_rank = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->proc_name = *pptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
app_index = *sptr;
if(app_index >= num_context) {
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map = mapping[app_index];
proc->app = strdup(app_context[app_index]->app);
continue;
}
if (strcmp(keyval->key, ORTE_PROC_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->pid = *pidptr;
continue;
}
if (strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->local_pid = *pidptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
}
/* global record */
if(NULL == map) {
OBJ_RELEASE(proc);
continue;
}
/*
* This seems like a dummy check, but it ensures that we fail
* rather than overrun our array. This can happen if the
* indicies on the app schemas are incorrect
*/
if(map->num_procs < map->app->num_procs) {
map->procs[map->num_procs++] = proc;
proc->proc_node = orte_rmaps_lookup_node(&map->nodes, &nodes, node_name, proc);
}
else {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup;
}
}
/* cleanup any nodes allocated and not mapped */
while(NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
/* release temporary variables */
for(i=0; i<num_context; i++) {
opal_list_append(mapping_list, &mapping[i]->super);
}
free(segment);
free(app_context);
free(mapping);
return ORTE_SUCCESS;
cleanup:
if(NULL != segment)
free(segment);
if(NULL != app_context) {
for(i=0; i<num_context; i++) {
OBJ_RELEASE(app_context[i]);
}
free(app_context);
}
if(NULL != mapping) {
for(i=0; i<num_context; i++) {
if(NULL != mapping[i])
OBJ_RELEASE(mapping[i]);
}
free(mapping);
}
/* cleanup any nodes allocated and not mapped */
while(NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
return rc;
}
/**
* Query the process mapping for a specified node from the registry.
*/
int orte_rmaps_base_get_node_map(
orte_cellid_t cellid,
orte_jobid_t jobid,
const char* hostname,
opal_list_t* mapping_list)
{
orte_app_context_t** app_context = NULL;
orte_rmaps_base_map_t** mapping = NULL;
orte_ras_node_t *ras_node = NULL;
orte_gpr_keyval_t *condition;
orte_std_cntr_t i, num_context = 0;
orte_std_cntr_t *sptr;
pid_t *pidptr;
orte_process_name_t *pptr;
char* segment = NULL;
orte_gpr_value_t** values;
orte_std_cntr_t v, num_values;
int rc;
char* keys[] = {
ORTE_PROC_RANK_KEY,
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_PROC_PID_KEY,
ORTE_PROC_LOCAL_PID_KEY,
ORTE_NODE_NAME_KEY,
NULL
};
/* allocate the node */
if(NULL == (ras_node = orte_ras.node_lookup(cellid,hostname))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
/* query the application context */
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &app_context, &num_context))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(NULL == (mapping = (orte_rmaps_base_map_t**)malloc(sizeof(orte_rmaps_base_map_t*) * num_context))) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(i=0; i<num_context; i++) {
orte_rmaps_base_map_t* map = OBJ_NEW(orte_rmaps_base_map_t);
orte_app_context_t* app = app_context[i];
OBJ_RETAIN(app);
map->app = app;
map->procs = (orte_rmaps_base_proc_t**)malloc(sizeof(orte_rmaps_base_proc_t*) * app->num_procs);
if(NULL == map->procs) {
OBJ_RELEASE(map);
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map->num_procs = 0;
mapping[i] = map;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* setup condition/filter for query - return only processes that
* are assigned to the specified node name
*/
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&condition, ORTE_NODE_NAME_KEY, ORTE_STRING, (void*)hostname))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* query the process list from the registry */
rc = orte_gpr.get_conditional(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
segment,
NULL,
keys,
1,
&condition,
&num_values,
&values);
/* sort the response */
qsort(values, num_values, sizeof(orte_gpr_value_t*),
(int (*)(const void*,const void*))orte_rmaps_value_compare);
/* build the proc list */
for(v=0; v<num_values; v++) {
orte_gpr_value_t* value = values[v];
orte_rmaps_base_map_t* map = NULL;
orte_rmaps_base_node_t *node = NULL;
orte_rmaps_base_proc_t* proc;
char* node_name = NULL;
orte_std_cntr_t kv, app_index;
proc = OBJ_NEW(orte_rmaps_base_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(kv = 0; kv<value->cnt; kv++) {
orte_gpr_keyval_t* keyval = value->keyvals[kv];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->proc_rank = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->proc_name = *pptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
app_index = *sptr;
if(app_index >= num_context) {
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map = mapping[app_index];
if(opal_list_get_size(&map->nodes) == 0) {
node = OBJ_NEW(orte_rmaps_base_node_t);
if(NULL == node) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup;
}
OBJ_RETAIN(ras_node);
node->node = ras_node;
opal_list_append(&map->nodes, &node->super);
} else {
node = (orte_rmaps_base_node_t*)opal_list_get_first(&map->nodes);
}
proc->app = strdup(app_context[app_index]->app);
continue;
}
if (strcmp(keyval->key, ORTE_PROC_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->pid = *pidptr;
continue;
}
if (strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->local_pid = *pidptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
}
/* skip this entry? */
if(NULL == map ||
proc->proc_name.cellid != cellid) {
OBJ_RELEASE(proc);
continue;
}
map->procs[map->num_procs++] = proc;
OBJ_RETAIN(proc);
opal_list_append(&node->node_procs, &proc->super);
proc->proc_node = node;
}
/* return mapping for the entries that have procs on this node */
for(i=0; i<num_context; i++) {
orte_rmaps_base_map_t* map = mapping[i];
if(map->num_procs) {
opal_list_append(mapping_list, &map->super);
} else {
OBJ_RELEASE(map);
}
}
/* decrement reference count on node */
OBJ_RELEASE(ras_node);
/* release all app context - note the reference count was bumped
* if saved in the map
*/
for(i=0; i<num_context; i++) {
OBJ_RELEASE(app_context[i]);
}
free(segment);
free(app_context);
free(mapping);
OBJ_RELEASE(condition);
return ORTE_SUCCESS;
cleanup:
if(NULL != segment)
free(segment);
if(NULL != app_context) {
for(i=0; i<num_context; i++) {
OBJ_RELEASE(app_context[i]);
}
free(app_context);
}
if(NULL != mapping) {
for(i=0; i<num_context; i++) {
if(NULL != mapping[i])
OBJ_RELEASE(mapping[i]);
}
free(mapping);
}
if (NULL != condition)
OBJ_RELEASE(condition);
return rc;
}
/**
* Set the process mapping in the registry.
*/
int orte_rmaps_base_set_map(orte_jobid_t jobid, opal_list_t* mapping_list)
{
orte_std_cntr_t i, j;
orte_std_cntr_t index=0;
orte_std_cntr_t num_procs = 0;
int rc = ORTE_SUCCESS;
opal_list_item_t* item;
orte_gpr_value_t** values;
char *segment;
for(item = opal_list_get_first(mapping_list);
item != opal_list_get_end(mapping_list);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item;
num_procs += map->num_procs;
}
if(num_procs == 0) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/**
* allocate value array. We need to reserve one extra spot so we can set the counter
* for the process INIT state to indicate that all procs are at that state. This will
* allow the INIT trigger to fire.
*/
values = (orte_gpr_value_t**)malloc((1+num_procs) * sizeof(orte_gpr_value_t*));
if(NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment,jobid))) {
ORTE_ERROR_LOG(rc);
free(values);
return rc;
}
/** setup the last value in the array to update the INIT counter */
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[num_procs]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 1, 1))) {
ORTE_ERROR_LOG(rc);
free(values);
free(segment);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[num_procs]->keyvals[0]), ORTE_PROC_NUM_AT_INIT, ORTE_STD_CNTR, &num_procs))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
values[num_procs]->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* counter is in the job's globals container */
for(i=0; i<num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 7, 0))) {
ORTE_ERROR_LOG(rc);
for(j=0; j<i; j++) {
OBJ_RELEASE(values[j]);
}
free(values);
free(segment);
return rc;
}
}
/* iterate through all processes and initialize value array */
for(item = opal_list_get_first(mapping_list);
item != opal_list_get_end(mapping_list);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item;
orte_std_cntr_t p;
for(p=0; p<map->num_procs; p++) {
orte_rmaps_base_proc_t* proc = map->procs[p];
orte_gpr_value_t* value = values[index++];
orte_proc_state_t proc_state=ORTE_PROC_STATE_INIT;
/* initialize keyvals */
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_PROC_RANK_KEY, ORTE_STD_CNTR, &(proc->proc_rank)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_PROC_NAME_KEY, ORTE_NAME, &(proc->proc_name)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_NODE_NAME_KEY, ORTE_STRING, proc->proc_node->node->node_name))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &(map->app->idx)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[5]), ORTE_PROC_PID_KEY, ORTE_PID, &(proc->pid)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[6]), ORTE_PROC_LOCAL_PID_KEY, ORTE_PID, &(proc->local_pid)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* set the tokens */
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(value->tokens), &(value->num_tokens), &(proc->proc_name)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
}
/* insert all values in one call */
if (ORTE_SUCCESS != (rc = orte_gpr.put((1+num_procs), values))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
for(i=0; i<num_procs; i++) {
if(NULL != values[i]) {
OBJ_RELEASE(values[i]);
}
}
if(NULL != values)
free(values);
return rc;
}

Просмотреть файл

@ -42,7 +42,7 @@ static orte_rmaps_base_module_t *select_any(void);
* Function for selecting one component from all those that are
* available.
*/
int orte_rmaps_base_map(orte_jobid_t job, char *desired_mapper)
int orte_rmaps_base_map_job(orte_jobid_t job, char *desired_mapper)
{
orte_rmaps_base_module_t *module=NULL;
int rc;

Просмотреть файл

@ -1,35 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/class/opal_list.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
int orte_rmaps_base_map_no_op(orte_jobid_t job, char *desired_mapper)
{
return ORTE_ERR_NOT_SUPPORTED;
}

Просмотреть файл

@ -50,14 +50,17 @@ orte_rmaps_base_t orte_rmaps_base;
* Declare the RMAPS module to hold the API function pointers
*/
orte_rmaps_base_module_t orte_rmaps = {
orte_rmaps_base_map,
orte_rmaps_base_map_job,
orte_rmaps_base_get_job_map,
orte_rmaps_base_get_node_map,
orte_rmaps_base_finalize
};
orte_rmaps_base_module_t orte_rmaps_no_op = {
orte_rmaps_base_map_no_op,
orte_rmaps_base_finalize
};
/*
* Include all the RMAPS class instance declarations
*/
#include "orte/mca/rmaps/base/rmaps_class_instances.h"
/**
* Function for finding and opening either all MCA components, or the one
@ -66,7 +69,7 @@ orte_rmaps_base_module_t orte_rmaps_no_op = {
int orte_rmaps_base_open(void)
{
int param, rc, value;
char *policy, *requested;
char *policy;
orte_data_type_t tmp;
/* Debugging / verbose output */
@ -150,30 +153,7 @@ int orte_rmaps_base_open(void)
}
/* Some systems do not want any RMAPS support. In those cases,
* memory consumption is also an issue. For those systems, we
* avoid opening the RMAPS components by checking for a directive
* to use the "null" component.
*/
param = mca_base_param_reg_string_name("rmaps", NULL, NULL,
false, false, NULL, NULL);
if (ORTE_ERROR == mca_base_param_lookup_string(param, &requested)) {
return ORTE_ERROR;
}
if (NULL != requested && 0 == strcmp(requested, "null")) {
/* the user has specifically requested that we use the "null"
* component. In this case, that means we do NOT open any
* components, and we simply use the default module we have
* already defined above
*/
orte_rmaps_base.no_op_selected = true;
orte_rmaps = orte_rmaps_no_op; /* use the no_op module */
return ORTE_SUCCESS;
}
orte_rmaps_base.no_op_selected = false;
/* Open up all the components that we can find */
if (ORTE_SUCCESS !=
mca_base_components_open("rmaps", orte_rmaps_base.rmaps_output,
mca_rmaps_base_static_components,

413
orte/mca/rmaps/base/rmaps_base_registry_fns.c Обычный файл
Просмотреть файл

@ -0,0 +1,413 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/util/output.h"
#include "opal/util/trace.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/schema/schema.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/smr/smr_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/base/base.h"
/*
* Query the process mapping from the registry.
*/
int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
{
orte_job_map_t *mapping;
orte_mapped_proc_t *proc;
orte_cellid_t *cellptr, cell;
orte_std_cntr_t *sptr;
bool *bptr, oversub;
pid_t *pidptr;
orte_process_name_t *pptr;
char *segment;
char *node_name;
char *username;
orte_gpr_value_t **values, *value;
orte_gpr_keyval_t* keyval;
orte_std_cntr_t v, kv, num_values;
int rc;
char* keys[] = {
ORTE_PROC_RANK_KEY,
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_PROC_LOCAL_PID_KEY,
ORTE_CELLID_KEY,
ORTE_NODE_NAME_KEY,
ORTE_NODE_USERNAME_KEY,
ORTE_NODE_OVERSUBSCRIBED_KEY,
NULL
};
OPAL_TRACE(1);
/* define default answer */
*map = NULL;
/* create the object */
mapping = OBJ_NEW(orte_job_map_t);
if (NULL == mapping) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* store the jobid */
mapping->job = jobid;
/* get the job segment name */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(mapping);
return rc;
}
/* query the application context */
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &(mapping->apps), &(mapping->num_apps)))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* query the process list from the registry */
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
segment,
NULL,
keys,
&num_values,
&values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(mapping);
free(segment);
return rc;
}
free(segment);
/* build the node and proc lists. each value corresponds
* to a process in the map
*/
for(v=0; v<num_values; v++) {
value = values[v];
node_name = NULL;
proc = OBJ_NEW(orte_mapped_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(kv = 0; kv<value->cnt; kv++) {
keyval = value->keyvals[kv];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->rank = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->name = *pptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->app_idx = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->pid = *pidptr;
continue;
}
if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cellptr, keyval->value, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
cell = *cellptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_USERNAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&username, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_OVERSUBSCRIBED_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, keyval->value, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
oversub = *bptr;
continue;
}
}
/* store this process in the map */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(mapping, cell, node_name, username, oversub, proc))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (NULL != node_name) free(node_name);
}
/* all done */
*map = mapping;
return ORTE_SUCCESS;
cleanup:
OBJ_RELEASE(mapping);
for (v=0; v < num_values; v++) {
OBJ_RELEASE(values[v]);
}
if (NULL != values) free(values);
return rc;
}
int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell,
char *nodename, orte_jobid_t job)
{
orte_job_map_t *map;
opal_list_item_t *item;
orte_mapped_node_t *nptr;
int rc;
/* set default answer */
*node = NULL;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_job_map(&map, job))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* scan the map for the indicated node */
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
nptr = (orte_mapped_node_t*)item;
if (cell == nptr->cell && 0 == strcmp(nodename, nptr->nodename)) {
*node = nptr;
/* protect the node object from release when we get rid
* of the map object
*/
opal_list_remove_item(&map->nodes, item);
OBJ_RELEASE(map);
return ORTE_SUCCESS;
}
}
/* if we get here, then the node wasn't found */
OBJ_RELEASE(map);
return ORTE_ERR_NOT_FOUND;
}
/**
* Set the process mapping in the registry.
*/
int orte_rmaps_base_put_job_map(orte_job_map_t *map)
{
orte_std_cntr_t i, j;
orte_std_cntr_t index=0;
orte_std_cntr_t num_procs = 0;
int rc = ORTE_SUCCESS;
opal_list_item_t *item, *item2;
orte_gpr_value_t **values, *value;
char *segment;
orte_mapped_node_t *node;
orte_mapped_proc_t *proc;
orte_proc_state_t proc_state=ORTE_PROC_STATE_INIT;
OPAL_TRACE(2);
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
node = (orte_mapped_node_t*)item;
num_procs += opal_list_get_size(&node->procs);
}
if(num_procs == 0) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/**
* allocate value array. We need to reserve one extra spot so we can set the counter
* for the process INIT state to indicate that all procs are at that state. This will
* allow the INIT trigger to fire.
*/
values = (orte_gpr_value_t**)malloc((1+num_procs) * sizeof(orte_gpr_value_t*));
if(NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, map->job))) {
ORTE_ERROR_LOG(rc);
free(values);
return rc;
}
/** setup the last value in the array to update the INIT counter */
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[num_procs]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 1, 1))) {
ORTE_ERROR_LOG(rc);
free(values);
free(segment);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[num_procs]->keyvals[0]), ORTE_PROC_NUM_AT_INIT, ORTE_STD_CNTR, &num_procs))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
values[num_procs]->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* counter is in the job's globals container */
for(i=0; i<num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 8, 0))) {
ORTE_ERROR_LOG(rc);
for(j=0; j<i; j++) {
OBJ_RELEASE(values[j]);
}
free(values);
free(segment);
return rc;
}
}
/* iterate through all processes and initialize value array */
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
node = (orte_mapped_node_t*)item;
for (item2 = opal_list_get_first(&node->procs);
item2 != opal_list_get_end(&node->procs);
item2 = opal_list_get_next(item2)) {
proc = (orte_mapped_proc_t*)item2;
value = values[index++];
/* initialize keyvals */
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_PROC_RANK_KEY, ORTE_STD_CNTR, &(proc->rank)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_PROC_NAME_KEY, ORTE_NAME, &(proc->name)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_CELLID_KEY, ORTE_CELLID, &(node->cell)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), ORTE_NODE_NAME_KEY, ORTE_STRING, node->nodename))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), ORTE_NODE_USERNAME_KEY, ORTE_STRING, node->username))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[5]), ORTE_NODE_OVERSUBSCRIBED_KEY, ORTE_BOOL, &(node->oversubscribed)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[6]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &(proc->app_idx)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[7]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* set the tokens */
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(value->tokens), &(value->num_tokens), &(proc->name)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
}
/* insert all values in one call */
if (ORTE_SUCCESS != (rc = orte_gpr.put((1+num_procs), values))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
for(i=0; i<num_procs; i++) {
if(NULL != values[i]) {
OBJ_RELEASE(values[i]);
}
}
if(NULL != values)
free(values);
return rc;
}

Просмотреть файл

@ -48,6 +48,7 @@ static bool are_all_mapped_valid(char **mapping,
opal_list_t* nodes)
{
opal_list_item_t *item;
orte_ras_node_t *node;
int i;
bool matched;
@ -57,7 +58,8 @@ static bool are_all_mapped_valid(char **mapping,
for(item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
item = opal_list_get_next(item) ) {
if( 0 == strcmp( ((orte_ras_node_t*) item)->node_name, mapping[i]) ) {
node = (orte_ras_node_t*) item;
if( 0 == strcmp(node->node_name, mapping[i]) ) {
matched = true;
break;
}
@ -94,7 +96,7 @@ static bool is_mapped(opal_list_item_t *item,
/*
* Query the registry for all nodes allocated to a specified job
*/
int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots)
int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots)
{
opal_list_item_t *item, *next;
orte_ras_node_t *node;
@ -104,7 +106,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
/** set default answer */
*total_num_slots = 0;
if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(nodes, jobid))) {
/* get the allocation for this job */
if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(allocated_nodes, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -115,21 +118,21 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
id = mca_base_param_find("rmaps", NULL, "base_schedule_local");
mca_base_param_lookup_int(id, &nolocal);
if (0 == nolocal) {
for (item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
for (item = opal_list_get_first(allocated_nodes);
item != opal_list_get_end(allocated_nodes);
item = opal_list_get_next(item) ) {
if (0 == strcmp(((orte_ras_node_t *) item)->node_name,
orte_system_info.nodename) ||
opal_ifislocal(((orte_ras_node_t *) item)->node_name)) {
opal_list_remove_item(nodes, item);
node = (orte_ras_node_t*)item;
if (0 == strcmp(node->node_name, orte_system_info.nodename) ||
opal_ifislocal(node->node_name)) {
opal_list_remove_item(allocated_nodes, item);
break;
}
}
}
/** remove all nodes that are already at max usage */
item = opal_list_get_first(nodes);
while (item != opal_list_get_end(nodes)) {
item = opal_list_get_first(allocated_nodes);
while (item != opal_list_get_end(allocated_nodes)) {
/** save the next pointer in case we remove this node */
next = opal_list_get_next(item);
@ -137,8 +140,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
/** check to see if this node is fully used - remove if so */
node = (orte_ras_node_t*)item;
if (0 != node->node_slots_max && node->node_slots_inuse > node->node_slots_max) {
opal_list_remove_item(nodes, item);
} else { /** otherwise, add its slots to the total */
opal_list_remove_item(allocated_nodes, item);
} else { /** otherwise, add the slots for our job to the total */
num_slots += node->node_slots;
}
@ -146,8 +149,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
item = next;
}
/* Sanity check to make sure we have been allocated nodes */
if (0 == opal_list_get_size(nodes)) {
/* Sanity check to make sure we have resources available */
if (0 == opal_list_get_size(allocated_nodes)) {
ORTE_ERROR_LOG(ORTE_ERR_TEMP_OUT_OF_RESOURCE);
return ORTE_ERR_TEMP_OUT_OF_RESOURCE;
}
@ -245,67 +248,108 @@ int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
}
/*
* Claim a slot for a specified job on a node
*/
int orte_rmaps_base_claim_slot(orte_rmaps_base_map_t *map,
orte_ras_node_t *current_node,
orte_jobid_t jobid, orte_vpid_t vpid,
int proc_index,
opal_list_t *nodes,
opal_list_t *fully_used_nodes)
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename,
char *username, bool oversubscribed, orte_mapped_proc_t *proc)
{
orte_rmaps_base_proc_t *proc;
orte_process_name_t *proc_name;
orte_rmaps_base_node_t *rmaps_node;
int rc;
opal_list_item_t *item;
orte_mapped_node_t *node;
/* create objects */
rmaps_node = OBJ_NEW(orte_rmaps_base_node_t);
if (NULL == rmaps_node) {
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
node = (orte_mapped_node_t*)item;
if (cell == node->cell && 0 == strcmp(nodename, node->nodename)) {
/* node was found - add this proc to that list */
opal_list_append(&node->procs, &proc->super);
/* set the oversubscribed flag */
node->oversubscribed = oversubscribed;
return ORTE_SUCCESS;
}
}
/* node was NOT found - add this one to the list */
node = OBJ_NEW(orte_mapped_node_t);
if (NULL == node) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
OBJ_RETAIN(current_node);
rmaps_node->node = current_node;
proc = OBJ_NEW(orte_rmaps_base_proc_t);
node->cell = cell;
node->nodename = strdup(nodename);
if (NULL != username) {
node->username = strdup(username);
}
node->oversubscribed = oversubscribed;
opal_list_append(&map->nodes, &node->super);
/* and add this proc to the new node's list of procs */
opal_list_append(&node->procs, &proc->super);
return ORTE_SUCCESS;
}
/*
* Claim a slot for a specified job on a node
*/
int orte_rmaps_base_claim_slot(orte_job_map_t *map,
orte_ras_node_t *current_node,
orte_jobid_t jobid, orte_vpid_t vpid,
orte_std_cntr_t app_idx,
opal_list_t *nodes,
opal_list_t *fully_used_nodes)
{
orte_process_name_t *name;
orte_mapped_proc_t *proc;
bool oversub;
int rc;
/* create mapped_proc object */
proc = OBJ_NEW(orte_mapped_proc_t);
if (NULL == proc) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(rmaps_node);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* create the process name as an offset from the vpid-start */
rc = orte_ns.create_process_name(&proc_name, current_node->node_cellid,
rc = orte_ns.create_process_name(&name, current_node->node_cellid,
jobid, vpid);
if (rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(proc);
OBJ_RELEASE(rmaps_node);
return rc;
}
proc->proc_node = rmaps_node;
proc->proc_name = *proc_name;
proc->proc_rank = vpid;
orte_ns.free_name(&proc_name);
OBJ_RETAIN(proc); /* bump reference count for the node */
opal_list_append(&rmaps_node->node_procs, &proc->super);
map->procs[proc_index] = proc;
/* Save this node on the map */
opal_list_append(&map->nodes, &rmaps_node->super);
proc->name = *name;
proc->rank = vpid;
proc->app_idx = app_idx;
/* Be sure to demarcate this slot as claimed for the node */
current_node->node_slots_inuse++;
/* see if this node is oversubscribed now */
if (current_node->node_slots_inuse >= current_node->node_slots) {
oversub = true;
} else {
oversub = false;
}
/* add the proc to the map */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(map, current_node->node_cellid,
current_node->node_name,
current_node->node_username,
oversub, proc))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(proc);
return rc;
}
/* Remove this node if it has reached its max number of allocatable slots OR it has
* reached the soft limit AND we are in a "no oversubscribe" state
*/
if ((0 != current_node->node_slots_max &&
current_node->node_slots_inuse >= current_node->node_slots_max) ||
(!orte_rmaps_base.oversubscribe &&
current_node->node_slots_inuse >= current_node->node_slots)) {
(!orte_rmaps_base.oversubscribe && oversub)) {
opal_list_remove_item(nodes, (opal_list_item_t*)current_node);
/* add it to the list of fully used nodes */
opal_list_append(fully_used_nodes, &current_node->super);

142
orte/mca/rmaps/base/rmaps_class_instances.h Обычный файл
Просмотреть файл

@ -0,0 +1,142 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*/
#ifndef ORTE_MCA_RMAPS_CLASS_INST_H
#define ORTE_MCA_RMAPS_CLASS_INST_H
/*
* includes
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/rmaps.h"
/*
* Functions for use solely within the RMAPS framework
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* orte_mapped_proc_t
*/
static void orte_rmaps_mapped_proc_construct(orte_mapped_proc_t* proc)
{
proc->name.cellid = ORTE_CELLID_INVALID;
proc->name.jobid = ORTE_JOBID_INVALID;
proc->name.vpid = ORTE_VPID_INVALID;
proc->rank = 0;
proc->app_idx = 0;
proc->pid = 0;
}
OBJ_CLASS_INSTANCE(orte_mapped_proc_t,
opal_list_item_t,
orte_rmaps_mapped_proc_construct, NULL);
/*
* orte_mapped_node_t
*/
static void orte_rmaps_mapped_node_construct(orte_mapped_node_t* node)
{
node->nodename = NULL;
node->username = NULL;
node->daemon = NULL;
node->oversubscribed = false;
OBJ_CONSTRUCT(&node->procs, opal_list_t);
}
static void orte_rmaps_mapped_node_destruct(orte_mapped_node_t* node)
{
opal_list_item_t* item;
if (NULL != node->nodename) {
free(node->nodename);
}
if (NULL != node->username) {
free(node->username);
}
if (NULL != node->daemon) {
free(node->daemon);
}
while (NULL != (item = opal_list_remove_first(&node->procs))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node->procs);
}
OBJ_CLASS_INSTANCE(orte_mapped_node_t,
opal_list_item_t,
orte_rmaps_mapped_node_construct,
orte_rmaps_mapped_node_destruct);
/*
* orte_job_map_t
*/
static void orte_rmaps_job_map_construct(orte_job_map_t* map)
{
map->job = ORTE_JOBID_INVALID;
map->num_apps = 0;
map->apps = NULL;
OBJ_CONSTRUCT(&map->nodes, opal_list_t);
}
static void orte_rmaps_job_map_destruct(orte_job_map_t* map)
{
orte_std_cntr_t i=0;
opal_list_item_t* item;
for(i=0; i < map->num_apps; i++) {
if (NULL != map->apps[i]) OBJ_RELEASE(map->apps[i]);
}
if (NULL != map->apps) {
free(map->apps);
}
while (NULL != (item = opal_list_remove_first(&map->nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&map->nodes);
}
OBJ_CLASS_INSTANCE(orte_job_map_t,
opal_list_item_t,
orte_rmaps_job_map_construct,
orte_rmaps_job_map_destruct);
/*
* external API functions will be documented in the mca/rmaps/rmaps.h file
*/
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -30,6 +30,7 @@
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/rmaps.h"
@ -67,15 +68,47 @@ OBJ_CLASS_DECLARATION(orte_rmaps_base_cmp_t);
/*
* Base functions
* Base API functions
*/
int orte_rmaps_base_map(orte_jobid_t job, char *desired_mapper);
/*
* NO_OP functions
* Map a job
* All calls to rmaps.map_job are routed through this function. This allows callers to
* the RMAPS framework to specify the particular mapper they wish to use.
*/
int orte_rmaps_base_map_no_op(orte_jobid_t job, char *desired_mapper);
int orte_rmaps_base_map_job(orte_jobid_t job, char *desired_mapper);
/*
* Get job map
* Retrieve the information for a job map from the registry and reassemble it into
* an job_map object. Memory for the job_map object and all of its elements is
* allocated by the function
*/
ORTE_DECLSPEC int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t job);
/*
* Get node map
* Retrieve the information for a job map from the registry and provide the info
* for the specified node
*/
ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell,
char *nodename, orte_jobid_t job);
/*
* Registry functions for maps
*/
/*
* Put job map
* Given a pointer to an orte_job_map_t, place the map's information on
* the registry. Info is entered into the containers for each individual process on
* the job's segment. Additionally, the function sets the INIT counter to the number
* of processes in the map, thus causing the INIT trigger to fire so that any
* attached subscriptions can be serviced.
*/
ORTE_DECLSPEC int orte_rmaps_base_put_job_map(orte_job_map_t *map);
/*
* communication functions
@ -89,55 +122,60 @@ void orte_rmaps_base_recv(int status, orte_process_name_t* sender,
/*
* Internal support functions
*/
ORTE_DECLSPEC int orte_rmaps_base_mapped_node_query(opal_list_t* mapping_list, opal_list_t* nodes_alloc, orte_jobid_t jobid);
ORTE_DECLSPEC int orte_rmaps_base_get_map(orte_jobid_t, opal_list_t* mapping);
ORTE_DECLSPEC int orte_rmaps_base_set_map(orte_jobid_t, opal_list_t* mapping);
ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_cellid_t, orte_jobid_t, const char*, opal_list_t* mapping);
/*
* Function to add a mapped_proc entry to a map
* Scans list of nodes on map to see if the specified one already
* exists - if so, just add this entry to that node's list of
* procs. If not, then add new node entry and put this proc
* on its list.
*/
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename,
char *username, bool oversubscribed, orte_mapped_proc_t *proc);
ORTE_DECLSPEC int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots);
ORTE_DECLSPEC int orte_rmaps_base_update_node_usage(opal_list_t *nodes);
ORTE_DECLSPEC int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
orte_app_context_t *app,
opal_list_t *master_node_list,
orte_std_cntr_t *total_num_slots);
int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots);
int orte_rmaps_base_update_node_usage(opal_list_t *nodes);
int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
orte_app_context_t *app,
opal_list_t *master_node_list,
orte_std_cntr_t *total_num_slots);
ORTE_DECLSPEC int orte_rmaps_base_claim_slot(orte_rmaps_base_map_t *map,
orte_ras_node_t *current_node,
orte_jobid_t jobid, orte_vpid_t vpid,
int proc_index,
opal_list_t *nodes,
opal_list_t *fully_used_nodes);
int orte_rmaps_base_claim_slot(orte_job_map_t *map,
orte_ras_node_t *current_node,
orte_jobid_t jobid, orte_vpid_t vpid,
orte_std_cntr_t app_idx,
opal_list_t *nodes,
opal_list_t *fully_used_nodes);
/** Local data type functions */
void orte_rmaps_base_std_obj_release(orte_data_value_t *value);
/* JOB_MAP */
int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_map(orte_rmaps_base_map_t *value1, orte_rmaps_base_map_t *value2, orte_data_type_t type);
int orte_rmaps_base_copy_map(orte_job_map_t **dest, orte_job_map_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_map(orte_job_map_t *value1, orte_job_map_t *value2, orte_data_type_t type);
int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type);
int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t *src, orte_data_type_t type);
int orte_rmaps_base_size_map(size_t *size, orte_rmaps_base_map_t *src, orte_data_type_t type);
int orte_rmaps_base_print_map(char **output, char *prefix, orte_job_map_t *src, orte_data_type_t type);
int orte_rmaps_base_size_map(size_t *size, orte_job_map_t *src, orte_data_type_t type);
int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type);
/* MAPPED_PROC */
int orte_rmaps_base_copy_mapped_proc(orte_rmaps_base_proc_t **dest, orte_rmaps_base_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_mapped_proc(orte_rmaps_base_proc_t *value1, orte_rmaps_base_proc_t *value2, orte_data_type_t type);
int orte_rmaps_base_copy_mapped_proc(orte_mapped_proc_t **dest, orte_mapped_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_mapped_proc(orte_mapped_proc_t *value1, orte_mapped_proc_t *value2, orte_data_type_t type);
int orte_rmaps_base_pack_mapped_proc(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type);
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_base_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_rmaps_base_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_mapped_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_mapped_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type);
/* MAPPED_NODE */
int orte_rmaps_base_copy_mapped_node(orte_rmaps_base_node_t **dest, orte_rmaps_base_node_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_mapped_node(orte_rmaps_base_node_t *value1, orte_rmaps_base_node_t *value2, orte_data_type_t type);
int orte_rmaps_base_copy_mapped_node(orte_mapped_node_t **dest, orte_mapped_node_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_mapped_node(orte_mapped_node_t *value1, orte_mapped_node_t *value2, orte_data_type_t type);
int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type);
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_base_node_t *src, orte_data_type_t type);
int orte_rmaps_base_size_mapped_node(size_t *size, orte_rmaps_base_node_t *src, orte_data_type_t type);
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_node_t *src, orte_data_type_t type);
int orte_rmaps_base_size_mapped_node(size_t *size, orte_mapped_node_t *src, orte_data_type_t type);
int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type);

Просмотреть файл

@ -69,6 +69,8 @@ orte_rmaps_base_component_t mca_rmaps_proxy_component = {
*/
static orte_rmaps_base_module_t orte_rmaps_proxy = {
orte_rmaps_proxy_map,
orte_rmaps_base_get_job_map,
orte_rmaps_base_get_node_map,
orte_rmaps_proxy_finalize
};

Просмотреть файл

@ -58,6 +58,18 @@
*/
typedef int (*orte_rmaps_base_module_map_fn_t)(orte_jobid_t job, char *desired_mapper);
/**
* Get the map of a job from the registry
*/
typedef int (*orte_rmaps_base_module_get_job_map_fn_t)(orte_job_map_t **map, orte_jobid_t job);
/**
* Get the map for a job on a specific node from the registry. Providing a jobid of
* ORTE_JOBID_WILDCARD will return the map of all processes on that node
*/
typedef int (*orte_rmaps_base_module_get_node_map_fn_t)(orte_mapped_node_t **node, orte_cellid_t cell,
char *nodename, orte_jobid_t job);
/**
* Cleanup module resources.
*/
@ -67,10 +79,14 @@ typedef int (*orte_rmaps_base_module_finalize_fn_t)(void);
* rmaps module version 1.3.0
*/
struct orte_rmaps_base_module_1_3_0_t {
/** Maping function pointer */
orte_rmaps_base_module_map_fn_t map_job;
/** Mapping function pointer */
orte_rmaps_base_module_map_fn_t map_job;
/** Get job map pointer */
orte_rmaps_base_module_get_job_map_fn_t get_job_map;
/** Node map pointer */
orte_rmaps_base_module_get_node_map_fn_t get_node_map;
/** Finalization function pointer */
orte_rmaps_base_module_finalize_fn_t finalize;
orte_rmaps_base_module_finalize_fn_t finalize;
};
/** Convenience typedef */
typedef struct orte_rmaps_base_module_1_3_0_t orte_rmaps_base_module_1_3_0_t;

Просмотреть файл

@ -24,11 +24,7 @@
#include "orte/orte_constants.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rmgr/rmgr_types.h"
/*
* General MAP types
@ -36,55 +32,51 @@
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**** JOB_MAP OBJECTS ***/
/*
* Mapped process info for job_map
*/
struct orte_mapped_proc_t {
opal_list_item_t super;
orte_process_name_t name; /* process name */
orte_std_cntr_t rank; /* process rank */
orte_std_cntr_t app_idx; /* index of app_context for this process */
pid_t pid;
};
typedef struct orte_mapped_proc_t orte_mapped_proc_t;
OBJ_CLASS_DECLARATION(orte_mapped_proc_t);
/*
* Mapping of nodes to process ranks.
*/
struct orte_rmaps_base_node_t {
struct orte_mapped_node_t {
opal_list_item_t super;
orte_ras_node_t* node;
opal_list_t node_procs; /* list of rmaps_base_proc_t */
orte_cellid_t cell; /* cell where this node is located */
char *nodename; /* name of node */
char *username;
orte_process_name_t *daemon; /* name of the daemon on this node
* NULL => daemon not assigned yet
*/
bool oversubscribed; /* whether or not the #procs > #processors */
opal_list_t procs; /* list of mapped_proc objects on this node */
};
typedef struct orte_rmaps_base_node_t orte_rmaps_base_node_t;
OBJ_CLASS_DECLARATION(orte_rmaps_base_node_t);
typedef struct orte_mapped_node_t orte_mapped_node_t;
OBJ_CLASS_DECLARATION(orte_mapped_node_t);
/*
* Mapping of a process rank to a specific node.
*/
struct orte_rmaps_base_proc_t {
opal_list_item_t super;
char *app; /* name of executable */
orte_rmaps_base_node_t* proc_node;
orte_process_name_t proc_name;
orte_std_cntr_t proc_rank;
pid_t pid; /* PLS-assigned pid */
pid_t local_pid; /* pid found by local process */
};
typedef struct orte_rmaps_base_proc_t orte_rmaps_base_proc_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_proc_t);
/*
* Structure that represents the mapping of an application to an
* Structure that represents the mapping of a job to an
* allocated set of resources.
*/
struct orte_rmaps_base_map_t {
opal_list_item_t super;
orte_app_context_t *app;
orte_rmaps_base_proc_t** procs;
orte_std_cntr_t num_procs;
opal_list_t nodes; /* list of rmaps_base_node_t */
struct orte_job_map_t {
opal_object_t super;
orte_jobid_t job;
orte_std_cntr_t num_apps; /* number of app_contexts */
orte_app_context_t **apps; /* the array of app_contexts for this job */
opal_list_t nodes; /* list of mapped_node_t */
};
typedef struct orte_rmaps_base_map_t orte_rmaps_base_map_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_map_t);
typedef struct orte_job_map_t orte_job_map_t;
OBJ_CLASS_DECLARATION(orte_job_map_t);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -31,6 +31,7 @@
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/output.h"
#include "opal/util/trace.h"
#include "opal/util/show_help.h"
#include "opal/util/argv.h"
@ -56,7 +57,7 @@ static opal_list_t fully_used_nodes;
*/
static int map_app_by_node(
orte_app_context_t* app,
orte_rmaps_base_map_t* map,
orte_job_map_t* map,
orte_jobid_t jobid,
orte_vpid_t vpid_start,
opal_list_t* nodes,
@ -66,8 +67,9 @@ static int map_app_by_node(
orte_std_cntr_t num_alloc = 0;
opal_list_item_t *next;
orte_ras_node_t *node;
OPAL_TRACE(2);
/* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of
resources" when all nodes have node_slots_max processes mapped to them,
@ -110,7 +112,7 @@ static int map_app_by_node(
/* Allocate a slot on this node */
node = (orte_ras_node_t*) cur_node_item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, num_alloc,
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, app->idx,
nodes, max_used_nodes))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -121,8 +123,6 @@ static int map_app_by_node(
cur_node_item = next;
}
map->num_procs = num_alloc;
return ORTE_SUCCESS;
}
@ -133,7 +133,7 @@ static int map_app_by_node(
*/
static int map_app_by_slot(
orte_app_context_t* app,
orte_rmaps_base_map_t* map,
orte_job_map_t* map,
orte_jobid_t jobid,
orte_vpid_t vpid_start,
opal_list_t* nodes,
@ -145,7 +145,8 @@ static int map_app_by_slot(
orte_ras_node_t *node;
opal_list_item_t *next;
OPAL_TRACE(2);
/* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of
resources" when either all nodes have node_slots_max processes mapped to them,
@ -195,7 +196,7 @@ static int map_app_by_slot(
num_slots_to_take = (node->node_slots == 0) ? 1 : node->node_slots;
for( i = 0; i < num_slots_to_take; ++i) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, num_alloc,
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, app->idx,
nodes, max_used_nodes))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop
@ -223,8 +224,6 @@ static int map_app_by_slot(
}
map->num_procs = num_alloc;
return ORTE_SUCCESS;
}
@ -235,11 +234,10 @@ static int map_app_by_slot(
static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
{
orte_app_context_t** context, *app;
orte_rmaps_base_map_t* map;
orte_std_cntr_t i, num_context;
orte_app_context_t *app;
orte_job_map_t* map;
orte_std_cntr_t i;
opal_list_t master_node_list, mapped_node_list, max_used_nodes, *working_node_list;
opal_list_t mapping;
opal_list_item_t *item, *item2;
orte_ras_node_t *node, *node2;
orte_vpid_t vpid_start, job_vpid_start=0;
@ -247,8 +245,20 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
int rc;
bool bynode = true, modify_app_context = false;
OPAL_TRACE(1);
/* create the map object */
map = OBJ_NEW(orte_job_map_t);
if (NULL == map) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* set the jobid */
map->job = jobid;
/* query for the application context and allocated nodes */
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &context, &num_context))) {
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &(map->apps), &(map->num_apps)))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -271,11 +281,6 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
return rc;
}
/* construct a mapping for the job - the list will hold mappings for each
* application context
*/
OBJ_CONSTRUCT(&mapping, opal_list_t);
/** initialize the cur_node_item to point to the first node in the list */
cur_node_item = opal_list_get_first(&master_node_list);
@ -298,30 +303,20 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
/** construct a list to hold any nodes involved in a user-specified mapping */
OBJ_CONSTRUCT(&mapped_node_list, opal_list_t);
for(i=0; i<num_context; i++) {
app = context[i];
for(i=0; i < map->num_apps; i++) {
app = map->apps[i];
/** if the number of processes wasn't specified, then we know there can be only
* one app_context allowed in the launch, and that we are to launch it across
* all available slots. We'll double-check the single app_context rule first
*/
if (0 == app->num_procs && 1 < num_context) {
if (0 == app->num_procs && 1 < map->num_apps) {
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np",
true, num_context, NULL);
true, map->num_apps, NULL);
ORTE_ERROR_LOG(ORTE_ERR_INVALID_NUM_PROCS);
return ORTE_ERR_INVALID_NUM_PROCS;
}
/** create a map for this app_context */
map = OBJ_NEW(orte_rmaps_base_map_t);
if(NULL == map) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/** add it to the list of mappings for the job */
opal_list_append(&mapping, &map->super);
if ( 0 < app->num_map ) {
/** If the user has specified a mapping for this app_context, then we
* create a working node list that contains only those nodes.
@ -355,15 +350,6 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
}
}
map->app = app;
map->procs = (orte_rmaps_base_proc_t**)malloc(sizeof(orte_rmaps_base_proc_t*) * app->num_procs);
if(NULL == map->procs) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/* allocate a vpid range for this app within the job */
if(ORTE_SUCCESS != (rc = orte_ns.reserve_range(jobid, app->num_procs, &vpid_start))) {
ORTE_ERROR_LOG(rc);
@ -467,7 +453,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
}
/* save mapping to the registry */
if(ORTE_SUCCESS != (rc = orte_rmaps_base_set_map(jobid, &mapping))) {
if(ORTE_SUCCESS != (rc = orte_rmaps_base_put_job_map(map))) {
goto cleanup;
}
@ -493,7 +479,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
processes
*/
if (modify_app_context) {
if (ORTE_SUCCESS != (rc = orte_rmgr.store_app_context(jobid, context, 1))) {
if (ORTE_SUCCESS != (rc = orte_rmgr.store_app_context(jobid, map->apps, 1))) {
ORTE_ERROR_LOG(rc);
}
}
@ -505,11 +491,6 @@ cleanup:
}
OBJ_DESTRUCT(&master_node_list);
while(NULL != (item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping);
OBJ_DESTRUCT(&max_used_nodes);
OBJ_DESTRUCT(&fully_used_nodes);
OBJ_DESTRUCT(&mapped_node_list);
@ -526,6 +507,8 @@ static int orte_rmaps_rr_finalize(void)
orte_rmaps_base_module_t orte_rmaps_round_robin_module = {
orte_rmaps_rr_map,
orte_rmaps_base_get_job_map,
orte_rmaps_base_get_node_map,
orte_rmaps_rr_finalize
};

Просмотреть файл

@ -62,6 +62,7 @@
#define ORTE_NODE_ALLOC_KEY "orte-node-alloc"
#define ORTE_NODE_BOOTPROXY_KEY "orte-node-bootproxy"
#define ORTE_NODE_USERNAME_KEY "orte-node-username"
#define ORTE_NODE_OVERSUBSCRIBED_KEY "orte-node-oversubscribed"
#define ORTE_JOB_APP_CONTEXT_KEY "orte-job-app-context"
#define ORTE_JOB_SLOTS_KEY "orte-job-slots" /**< number of procs in job */
#define ORTE_JOB_VPID_START_KEY "orte-job-vpid-start"

Просмотреть файл

@ -66,7 +66,7 @@ extern char **environ;
#include "opal/mca/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/rmgr_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/runtime/runtime.h"
#include "totalview.h"
@ -333,8 +333,11 @@ void orte_totalview_init_before_spawn(void)
*/
void orte_totalview_init_after_spawn(orte_jobid_t jobid)
{
opal_list_t list_of_resource_maps;
opal_list_item_t *item;
orte_job_map_t *map;
opal_list_item_t *item, *item2;
orte_mapped_node_t *node;
orte_mapped_proc_t *proc;
orte_app_context_t *appctx;
orte_std_cntr_t i;
int rc;
@ -364,23 +367,18 @@ void orte_totalview_init_after_spawn(orte_jobid_t jobid)
MPIR_debug_state = 1;
OBJ_CONSTRUCT(&list_of_resource_maps, opal_list_t);
/* Get the resource map for this job */
/* Get a list of the resource maps for this job */
rc = orte_rmaps_base_get_map(jobid, &list_of_resource_maps);
rc = orte_rmaps.get_job_map(&map, jobid);
if (ORTE_SUCCESS != rc) {
opal_output(0, "Error: Can't get list of resource maps\n");
opal_output(0, "Error: Can't get resource map\n");
ORTE_ERROR_LOG(rc);
}
/* find the total number of processes in the job */
for (item = opal_list_get_first(&list_of_resource_maps);
item != opal_list_get_end(&list_of_resource_maps);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t *map = (orte_rmaps_base_map_t*) item;
MPIR_proctable_size += map->num_procs;
for (i=0; i < map->num_apps; i++) {
MPIR_proctable_size += map->apps[i]->num_procs;
}
/* allocate MPIR_proctable */
@ -389,27 +387,34 @@ void orte_totalview_init_after_spawn(orte_jobid_t jobid)
MPIR_proctable_size);
if (MPIR_proctable == NULL) {
opal_output(0, "Error: Out of memory\n");
OBJ_DESTRUCT(&list_of_resource_maps);
OBJ_RELEASE(map);
}
/* initialize MPIR_proctable */
for (item = opal_list_get_first(&list_of_resource_maps);
item != opal_list_get_end(&list_of_resource_maps);
i=0;
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t *map = (orte_rmaps_base_map_t*) item;
for (i = 0; i < map->num_procs; i++) {
orte_rmaps_base_proc_t *proc = map->procs[i];
MPIR_proctable[i].host_name = proc->proc_node->node->node_name;
node = (orte_mapped_node_t*)item;
for (item2 = opal_list_get_first(&node->procs);
item2 != opal_list_get_end(&node->procs);
item2 = opal_list_get_next(item2)) {
proc = (orte_mapped_proc_t*)item2;
appctx = map->apps[proc->app_idx];
MPIR_proctable[i].host_name = strdup(node->nodename);
MPIR_proctable[i].executable_name =
opal_os_path( false, map->app->cwd, proc->app, NULL );
MPIR_proctable[i].pid = proc->local_pid;
opal_os_path( false, appctx->cwd, appctx->app, NULL );
MPIR_proctable[i].pid = proc->pid;
i++;
}
}
OBJ_DESTRUCT(&list_of_resource_maps);
OBJ_RELEASE(map);
}
if (orte_debug_flag) {
dump();
}