Bring the map fixes into the main trunk. This should fix several problems, including the multiple app_context issue.
I have tested on rsh, slurm, bproc, and tm. Bproc continues to have a problem (will be asking for help there). Gridengine compiles but I cannot test (believe it likely will run). Poe and xgrid compile to the extent they can without the proper include files. This commit was SVN r12059.
Этот коммит содержится в:
родитель
5dbe5c7442
Коммит
ae79894bad
@ -259,7 +259,7 @@ int orte_errmgr_bproc_register_job(orte_jobid_t job)
|
||||
}
|
||||
|
||||
/* send the request */
|
||||
if (0 > orte_rml.send_buffer(orte_errmgr_proxy_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) {
|
||||
if (0 > orte_rml.send_buffer(orte_errmgr_bproc_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
|
||||
OBJ_RELEASE(cmd);
|
||||
return ORTE_ERR_COMM_FAILURE;
|
||||
@ -274,7 +274,7 @@ int orte_errmgr_bproc_register_job(orte_jobid_t job)
|
||||
}
|
||||
|
||||
/* enter a blocking receive until we hear back */
|
||||
if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_ERRMGR)) {
|
||||
if (0 > orte_rml.recv_buffer(orte_errmgr_bproc_globals.replica, answer, ORTE_RML_TAG_ERRMGR)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
|
||||
OBJ_RELEASE(answer);
|
||||
return ORTE_ERR_COMM_FAILURE;
|
||||
|
@ -65,9 +65,14 @@ int orte_ns_base_print_name(char **output, char *prefix, orte_process_name_t *na
|
||||
/* set default result */
|
||||
*output = NULL;
|
||||
|
||||
asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: [%lu,%lu,%lu]",
|
||||
if (NULL == name) {
|
||||
asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: NULL",
|
||||
(NULL == prefix ? " " : prefix));
|
||||
} else {
|
||||
asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: [%lu,%lu,%lu]",
|
||||
(NULL == prefix ? " " : prefix), (unsigned long)name->cellid,
|
||||
(unsigned long)name->jobid, (unsigned long)name->vpid);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -137,15 +137,3 @@ int orte_odls_bproc_component_close(void)
|
||||
OBJ_DESTRUCT(&mca_odls_bproc_component.children);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_odls_bproc_component_finalize(void)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
|
||||
/* cleanup state */
|
||||
while (NULL != (item = opal_list_remove_first(&mca_odls_bproc_component.children))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ OBJ_CLASS_INSTANCE(orte_pls_daemon_info_t, /* type name */
|
||||
/*
|
||||
* Store the active daemons for a job
|
||||
*/
|
||||
int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
|
||||
int orte_pls_base_store_active_daemons(opal_list_t *daemons)
|
||||
{
|
||||
orte_pls_daemon_info_t *dmn;
|
||||
opal_list_item_t *item;
|
||||
@ -63,6 +63,10 @@ int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
|
||||
|
||||
/* determine the number of daemons */
|
||||
num_daemons = opal_list_get_size(daemons);
|
||||
|
||||
if (0 == num_daemons) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* since each daemon gets recorded in a separate node's container,
|
||||
* we need to allocate space for num_daemons value objects
|
||||
@ -74,15 +78,6 @@ int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
|
||||
}
|
||||
memset(values, 0, num_daemons*sizeof(orte_gpr_value_t*)); /* NULL the array */
|
||||
|
||||
/* setup the key */
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, job))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(values[0]);
|
||||
return rc;
|
||||
}
|
||||
asprintf(&key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);
|
||||
free(jobid_string);
|
||||
|
||||
/* loop through the values and the list and create all the value objects */
|
||||
item = opal_list_get_first(daemons);
|
||||
for (i=0; i < num_daemons; i++) {
|
||||
@ -102,6 +97,15 @@ int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* setup the key */
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, dmn->active_job))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(values[0]);
|
||||
return rc;
|
||||
}
|
||||
asprintf(&key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);
|
||||
free(jobid_string);
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[0]), key, ORTE_NAME, dmn->name))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
@ -140,7 +144,10 @@ int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job)
|
||||
NULL
|
||||
};
|
||||
orte_cellid_t *cell;
|
||||
char *nodename;
|
||||
orte_process_name_t *name;
|
||||
orte_pls_daemon_info_t *dmn;
|
||||
bool found_name, found_node, found_cell;
|
||||
int rc;
|
||||
|
||||
/* setup the key */
|
||||
@ -164,27 +171,29 @@ int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job)
|
||||
|
||||
/* loop through the answers and construct the list */
|
||||
for (i=0; i < cnt; i++) {
|
||||
/* each container should have only one set of values */
|
||||
dmn = OBJ_NEW(orte_pls_daemon_info_t);
|
||||
if (NULL == dmn) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
goto CLEANUP;
|
||||
}
|
||||
/* for systems such as bproc, the node segment holds containers
|
||||
* for nodes that we may not have launched upon. Each container
|
||||
* will send us back a value object, so we have to ensure here
|
||||
* that we only create daemon objects on the list for those nodes
|
||||
* that DO provide a valid object
|
||||
*/
|
||||
found_name = found_node = found_cell = false;
|
||||
for (j=0; j < values[i]->cnt; j++) {
|
||||
kv = values[i]->keyvals[j];
|
||||
if (0 == strcmp(kv->key, keys[0])) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), kv->value->data, ORTE_NAME))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&name, kv->value, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
found_name = true;
|
||||
continue;
|
||||
}
|
||||
if (0 == strcmp(kv->key, ORTE_NODE_NAME_KEY)) {
|
||||
/* use the dss.copy function here to protect us against zero-length strings */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->nodename), kv->value->data, ORTE_STRING))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&nodename, kv->value, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
found_node = true;
|
||||
continue;
|
||||
}
|
||||
if (0 == strcmp(kv->key, ORTE_CELLID_KEY)) {
|
||||
@ -192,12 +201,32 @@ int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job)
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
dmn->cell = *cell;
|
||||
found_cell = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* add this daemon to the list */
|
||||
opal_list_append(daemons, &dmn->super);
|
||||
/* if we found everything, then this is a valid entry - create
|
||||
* it and add it to the list
|
||||
*/
|
||||
if (found_name && found_node && found_cell) {
|
||||
dmn = OBJ_NEW(orte_pls_daemon_info_t);
|
||||
if (NULL == dmn) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(dmn);
|
||||
goto CLEANUP;
|
||||
}
|
||||
dmn->cell = *cell;
|
||||
if (NULL != nodename) {
|
||||
dmn->nodename = strdup(nodename);
|
||||
}
|
||||
/* add this daemon to the list */
|
||||
opal_list_append(daemons, &dmn->super);
|
||||
}
|
||||
OBJ_RELEASE(values[i]);
|
||||
}
|
||||
|
||||
@ -212,5 +241,23 @@ CLEANUP:
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve the active daemon(s) for a specific node
|
||||
* Remove a daemon from the world of active daemons
|
||||
*/
|
||||
int orte_pls_base_remove_daemon(orte_pls_daemon_info_t *info)
|
||||
{
|
||||
opal_list_t daemons;
|
||||
int rc;
|
||||
|
||||
OBJ_CONSTRUCT(&daemons, opal_list_t);
|
||||
|
||||
/* We actually don't want to do this - instead, we need to do a registry
|
||||
* delete function call targeting this entry
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, info->active_job))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* find this item in the list */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -58,6 +58,7 @@ int orte_pls_base_orted_exit(opal_list_t *daemons)
|
||||
item = opal_list_get_next(item)) {
|
||||
dmn = (orte_pls_daemon_info_t*)item;
|
||||
|
||||
opal_output(0, "sending exit cmd to daemon [%ld,%ld,%ld]", ORTE_NAME_ARGS(dmn->name));
|
||||
if (0 > orte_rml.send_buffer(dmn->name, &cmd, ORTE_RML_TAG_PLS_ORTED, 0)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
|
||||
OBJ_DESTRUCT(&cmd);
|
||||
|
@ -80,8 +80,9 @@ extern "C" {
|
||||
int orte_pls_base_orted_add_local_procs(opal_list_t *daemons, orte_gpr_notify_data_t *ndat);
|
||||
|
||||
int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job);
|
||||
int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job);
|
||||
|
||||
int orte_pls_base_store_active_daemons(opal_list_t *daemons);
|
||||
int orte_pls_base_remove_daemon(orte_pls_daemon_info_t *info);
|
||||
|
||||
/*
|
||||
* communications utilities
|
||||
*/
|
||||
|
@ -60,17 +60,14 @@
|
||||
#include "orte/mca/ns/ns.h"
|
||||
#include "orte/mca/sds/base/base.h"
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
#include "orte/mca/ras/base/base.h"
|
||||
#include "orte/mca/ras/ras.h"
|
||||
#include "orte/mca/rmgr/rmgr.h"
|
||||
#include "orte/mca/rmaps/rmaps_types.h"
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/smr/smr.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
#include "orte/runtime/runtime.h"
|
||||
|
||||
/* remove this when moved to 2.0 */
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
|
||||
#include "orte/mca/pls/base/pls_private.h"
|
||||
#include "pls_bproc.h"
|
||||
|
||||
@ -104,7 +101,7 @@ orte_pls_base_module_t orte_pls_bproc_module = {
|
||||
};
|
||||
|
||||
|
||||
static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
|
||||
static int orte_pls_bproc_node_array(orte_job_map_t* map,
|
||||
int ** node_array, int * node_array_len);
|
||||
static int orte_pls_bproc_node_list(int * node_array, int node_array_len,
|
||||
int ** node_list, int * num_nodes,
|
||||
@ -123,12 +120,12 @@ static int bproc_vexecmove(int nnodes, int *nodes, int *pids, const char *cmd,
|
||||
#endif
|
||||
static void orte_pls_bproc_setup_env(char *** env);
|
||||
static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
|
||||
int ** node_arrays, int * node_array_lens,
|
||||
int num_contexts, int num_procs,
|
||||
orte_job_map_t *map,
|
||||
orte_vpid_t global_vpid_start,
|
||||
orte_jobid_t jobid, int* num_daemons);
|
||||
static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
|
||||
orte_rmaps_base_map_t* map, int num_processes,
|
||||
orte_job_map_t* map, int num_processes,
|
||||
int num_slots,
|
||||
orte_vpid_t vpid_start,
|
||||
orte_vpid_t global_vpid_start,
|
||||
int app_context,
|
||||
@ -144,7 +141,7 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
|
||||
* @retval >=0 the number of processes
|
||||
* @retval <0 orte err
|
||||
*/
|
||||
static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
|
||||
static int orte_pls_bproc_node_array(orte_job_map_t* map,
|
||||
int ** node_array, int * node_array_len) {
|
||||
opal_list_item_t* item;
|
||||
int num_procs = 0;
|
||||
@ -156,8 +153,8 @@ static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
|
||||
for(item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
if(*node_array_len < atol(((orte_rmaps_base_node_t*)item)->node->node_name)) {
|
||||
*node_array_len = atol(((orte_rmaps_base_node_t*)item)->node->node_name);
|
||||
if(*node_array_len < atol(((orte_mapped_node_t*)item)->nodename)) {
|
||||
*node_array_len = atol(((orte_mapped_node_t*)item)->nodename);
|
||||
}
|
||||
}
|
||||
(*node_array_len)++;
|
||||
@ -172,9 +169,9 @@ static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
|
||||
for(item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rmaps_base_node_t* node = (orte_rmaps_base_node_t*)item;
|
||||
num_on_node = opal_list_get_size(&node->node_procs);
|
||||
(*node_array)[atol(node->node->node_name)] += num_on_node;
|
||||
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
|
||||
num_on_node = opal_list_get_size(&node->procs);
|
||||
(*node_array)[atol(node->nodename)] += num_on_node;
|
||||
num_procs += num_on_node;
|
||||
}
|
||||
return num_procs;
|
||||
@ -493,14 +490,12 @@ static void orte_pls_bproc_setup_env(char *** env)
|
||||
* @retval error
|
||||
*/
|
||||
static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
|
||||
int ** node_arrays, int * node_array_lens,
|
||||
int num_contexts, int num_procs,
|
||||
orte_job_map_t *map,
|
||||
orte_vpid_t global_vpid_start,
|
||||
orte_jobid_t jobid, int *num_launched) {
|
||||
int * daemon_list = NULL;
|
||||
int num_nodes = 0;
|
||||
int num_daemons = 0;
|
||||
int rc, i, j;
|
||||
int rc, i;
|
||||
int * pids = NULL;
|
||||
int argc;
|
||||
char ** argv = NULL;
|
||||
@ -524,26 +519,25 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
|
||||
*/
|
||||
OBJ_CONSTRUCT(&daemons, opal_list_t);
|
||||
|
||||
/* find the length of the longest node array */
|
||||
for(i = 0; i < num_contexts; i++) {
|
||||
if(node_array_lens[i] > num_nodes) {
|
||||
num_nodes = node_array_lens[i];
|
||||
}
|
||||
}
|
||||
if(NULL == (daemon_list = (int*)malloc(sizeof(int) * num_nodes))) {
|
||||
/* get the number of nodes in this job and allocate an array for
|
||||
* their names so we can pass that to bproc - populate the list
|
||||
* with the node names
|
||||
*/
|
||||
num_daemons = opal_list_get_size(&map->nodes);
|
||||
if(NULL == (daemon_list = (int*)malloc(sizeof(int) * num_daemons))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
goto cleanup;
|
||||
}
|
||||
/* create a list of all the nodes that need daemons, which is all the nodes
|
||||
* that will have at least 1 process */
|
||||
for(i = 0; i < num_nodes; i++) {
|
||||
for(j = 0; j < num_contexts; j++) {
|
||||
if(i < node_array_lens[j] && 0 < *(node_arrays[j] + i)) {
|
||||
daemon_list[num_daemons++] = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
i = 0;
|
||||
for (item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_mapped_node_t *node = (orte_mapped_node_t*)item;
|
||||
|
||||
daemon_list[i++] = atoi(node->nodename);
|
||||
}
|
||||
|
||||
/* allocate storage to save the daemon pids */
|
||||
if(NULL == (pids = (int*)malloc(sizeof(int) * num_daemons))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
goto cleanup;
|
||||
@ -574,7 +568,7 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
|
||||
|
||||
/* set up the base environment so the daemons can get their names once launched */
|
||||
rc = orte_ns_nds_bproc_put(cellid, daemon_jobid, daemon_vpid_start,
|
||||
global_vpid_start, num_procs, envp);
|
||||
global_vpid_start, num_daemons, envp);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
@ -695,7 +689,7 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
|
||||
}
|
||||
}
|
||||
/* store the daemon info */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
*num_launched = num_daemons;
|
||||
@ -857,19 +851,20 @@ orte_pls_bproc_monitor_nodes(void)
|
||||
* @retval error
|
||||
*/
|
||||
static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
|
||||
orte_rmaps_base_map_t* map, int num_processes,
|
||||
orte_job_map_t* map, int num_processes, int num_slots,
|
||||
orte_vpid_t vpid_start,
|
||||
orte_vpid_t global_vpid_start,
|
||||
int app_context, int * node_array,
|
||||
int node_array_len) {
|
||||
int * node_list = NULL;
|
||||
int num_nodes, num_slots, cycle;
|
||||
int num_nodes, cycle;
|
||||
int rc, i, j, stride;
|
||||
int * pids = NULL;
|
||||
char * var, * param;
|
||||
orte_process_name_t * proc_name;
|
||||
struct bproc_io_t bproc_io[3];
|
||||
orte_rmaps_base_node_t *node;
|
||||
char **env;
|
||||
int dbg;
|
||||
|
||||
OPAL_TRACE(1);
|
||||
|
||||
@ -877,25 +872,16 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* point to the env array for this app_context */
|
||||
env = map->apps[app_context]->env;
|
||||
|
||||
/* set up app context */
|
||||
asprintf(¶m, "%d", app_context);
|
||||
var = mca_base_param_environ_variable("pls", "bproc", "app_context");
|
||||
opal_setenv(var, param, true, &map->app->env);
|
||||
opal_setenv(var, param, true, &env);
|
||||
free(param);
|
||||
free(var);
|
||||
|
||||
/* in order for bproc processes to properly compute their name,
|
||||
* we have to provide them with info on the number of slots
|
||||
* on each node (which is a constant in bproc). We will pass this
|
||||
* in an appropriate parameter which we set for each app_context
|
||||
*/
|
||||
node = (orte_rmaps_base_node_t*)opal_list_get_first(&map->nodes);
|
||||
if (NULL == node) {
|
||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
num_slots = node->node->node_slots;
|
||||
|
||||
/* set the vpid-to-vpid stride based on the mapping mode */
|
||||
if (mca_pls_bproc_component.bynode) {
|
||||
@ -914,7 +900,7 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
|
||||
/* and push that value into the process' environment */
|
||||
asprintf(¶m, "%ld", (long)stride);
|
||||
var = mca_base_param_environ_variable("pls", "bproc", "stride");
|
||||
opal_setenv(var, param, true, &map->app->env);
|
||||
opal_setenv(var, param, true, &env);
|
||||
free(param);
|
||||
free(var);
|
||||
|
||||
@ -943,11 +929,14 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
fprintf(stderr, "launching app %s\n", map->apps[app_context]->app);
|
||||
while(0 != num_nodes) {
|
||||
fprintf(stderr, "\tlaunching cycle %d\n", i);
|
||||
for (dbg=0; dbg<num_nodes; dbg++) fprintf(stderr, "\t\tlaunching on node %d\n", node_list[dbg]);
|
||||
|
||||
/* setup environment so the procs can figure out their names */
|
||||
rc = orte_ns_nds_bproc_put(cellid, jobid, vpid_start, global_vpid_start,
|
||||
num_processes, &map->app->env);
|
||||
num_processes, &env);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
@ -962,21 +951,22 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
|
||||
opal_output(0, "pls_bproc: launching %d processes", num_nodes);
|
||||
}
|
||||
rc = bproc_vexecmove_io(num_nodes, node_list, pids, bproc_io, 3,
|
||||
map->app->app, map->app->argv, map->app->env);
|
||||
map->apps[app_context]->app,
|
||||
map->apps[app_context]->argv, env);
|
||||
if(0 < mca_pls_bproc_component.debug) {
|
||||
opal_output(0, "pls_bproc: %d processes launched. First pid: %d",
|
||||
rc, *pids);
|
||||
}
|
||||
if(rc != num_nodes) {
|
||||
opal_show_help("help-pls-bproc.txt", "proc-launch-number", true,
|
||||
num_nodes, rc, map->app->app);
|
||||
num_nodes, rc, map->apps[app_context]->app);
|
||||
rc = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
for(j = 0; j < num_nodes; j++) {
|
||||
if(0 >= pids[j]) {
|
||||
opal_show_help("help-pls-bproc.txt", "proc-launch-bad-pid", true,
|
||||
node_list[j], pids[j], errno, map->app->app);
|
||||
node_list[j], pids[j], errno, map->apps[app_context]->app);
|
||||
rc = ORTE_ERROR;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
@ -1056,10 +1046,10 @@ cleanup:
|
||||
* @retval error
|
||||
*/
|
||||
int orte_pls_bproc_launch(orte_jobid_t jobid) {
|
||||
opal_list_item_t* item, *item2;
|
||||
opal_list_t mapping;
|
||||
opal_list_item_t* item;
|
||||
orte_cellid_t cellid;
|
||||
orte_rmaps_base_map_t* map;
|
||||
orte_job_map_t* map;
|
||||
orte_mapped_node_t *map_node;
|
||||
orte_vpid_t vpid_launch;
|
||||
orte_vpid_t vpid_range;
|
||||
orte_vpid_t vpid_start;
|
||||
@ -1068,11 +1058,13 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
|
||||
int ** node_array = NULL;
|
||||
int * node_array_len = NULL;
|
||||
int num_processes = 0;
|
||||
int num_daemons = 0;
|
||||
int context = 0;
|
||||
int j;
|
||||
int num_daemons;
|
||||
int num_slots;
|
||||
int context;
|
||||
int i, j;
|
||||
orte_std_cntr_t idx;
|
||||
char cwd_save[OMPI_PATH_MAX + 1];
|
||||
orte_ras_node_t *ras_node;
|
||||
|
||||
OPAL_TRACE(1);
|
||||
|
||||
@ -1089,12 +1081,12 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
|
||||
}
|
||||
cwd_save[sizeof(cwd_save) - 1] = '\0';
|
||||
|
||||
/* query for the application context and allocated nodes */
|
||||
OBJ_CONSTRUCT(&mapping, opal_list_t);
|
||||
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_map(jobid, &mapping))) {
|
||||
/* get the job map */
|
||||
if(ORTE_SUCCESS != (rc = orte_rmaps.get_job_map(&map, jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if(ORTE_SUCCESS != (rc = orte_rmgr.get_vpid_range(jobid, &vpid_start,
|
||||
&vpid_range))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -1104,86 +1096,80 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
|
||||
/* get the cellid */
|
||||
cellid = orte_process_info.my_name->cellid;
|
||||
|
||||
/* do a large lock so the processes will not decrement the process count
|
||||
* until we are done launching */
|
||||
/* check all of the app_contexts for sanity */
|
||||
for (i=0; i < map->num_apps; i++) {
|
||||
/* Check that the cwd is sane. We have to chdir there in
|
||||
to check the executable, because the executable could
|
||||
have been specified as a relative path to the wdir */
|
||||
rc = orte_rmgr.check_context_cwd(map->apps[i], true);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
for (item = opal_list_get_first(&mapping);
|
||||
item != opal_list_get_end(&mapping);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_std_cntr_t i;
|
||||
map = (orte_rmaps_base_map_t*) item;
|
||||
|
||||
orte_dss.dump(0, map, ORTE_JOB_MAP);
|
||||
|
||||
for (i = 0; i < map->num_procs; ++i) {
|
||||
orte_app_context_t *context = map->app;
|
||||
/* Check that the app exists and is executable */
|
||||
rc = orte_rmgr.check_context_app(map->apps[i]);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Check that the cwd is sane. We have to chdir there in
|
||||
to check the executable, because the executable could
|
||||
have been specified as a relative path to the wdir */
|
||||
rc = orte_rmgr.check_context_cwd(context, true);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Check that the app exists and is executable */
|
||||
rc = orte_rmgr.check_context_app(context);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Return to the original dir */
|
||||
if (0 != chdir(cwd_save)) {
|
||||
rc = ORTE_ERR_IN_ERRNO;
|
||||
goto cleanup;
|
||||
}
|
||||
/* Return to the original dir */
|
||||
if (0 != chdir(cwd_save)) {
|
||||
rc = ORTE_ERR_IN_ERRNO;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* For Bproc, we need to know how many slots were allocated on each
|
||||
* node so the spawned processes can computer their name. Only Bproc
|
||||
* needs to do this, so we choose not to modify the mapped_node struct
|
||||
* to hold this info - bproc can go get it.
|
||||
*
|
||||
* Since Bproc also requires that the slots allocated on each node
|
||||
* be the same, we really only need to lookup a single node. So grab
|
||||
* the data for the first node on the map
|
||||
*/
|
||||
map_node = (orte_mapped_node_t*)opal_list_get_first(&map->nodes);
|
||||
if (NULL == (ras_node = orte_ras.node_lookup(map_node->cell, map_node->nodename))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
goto cleanup;
|
||||
}
|
||||
num_slots = ras_node->node_slots;
|
||||
OBJ_RELEASE(ras_node);
|
||||
|
||||
if(0 < mca_pls_bproc_component.debug) {
|
||||
opal_output(0, "pls_bproc: --- starting to launch procs ---");
|
||||
}
|
||||
|
||||
/* create an array to hold the pointers to the node arrays for each app
|
||||
* context. Also, create an array to hold the lengths of the node arrays */
|
||||
node_array = malloc(opal_list_get_size(&mapping) * sizeof(int *));
|
||||
node_array_len = malloc(opal_list_get_size(&mapping) * sizeof(int *));
|
||||
node_array = malloc(map->num_apps * sizeof(int *));
|
||||
node_array_len = malloc(map->num_apps * sizeof(int *));
|
||||
|
||||
/* for each application context - create a node array and setup its env */
|
||||
for(item = opal_list_get_first(&mapping);
|
||||
item != opal_list_get_end(&mapping);
|
||||
item = opal_list_get_next(item)) {
|
||||
map = (orte_rmaps_base_map_t*)item;
|
||||
rc = orte_pls_bproc_node_array(map, &node_array[context],
|
||||
&node_array_len[context]);
|
||||
for(i=0; i < map->num_apps; i++) {
|
||||
rc = orte_pls_bproc_node_array(map, &node_array[i],
|
||||
&node_array_len[i]);
|
||||
if(0 > rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
orte_pls_bproc_setup_env(&map->app->env);
|
||||
orte_pls_bproc_setup_env(&map->apps[i]->env);
|
||||
num_processes += rc;
|
||||
context++;
|
||||
}
|
||||
|
||||
/* save the active node names */
|
||||
idx = 0;
|
||||
for (item = opal_list_get_first(&mapping);
|
||||
item != opal_list_get_end(&mapping);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rmaps_base_map_t *map = (orte_rmaps_base_map_t*) item;
|
||||
for (item2 = opal_list_get_first(&map->nodes);
|
||||
item2 != opal_list_get_end(&map->nodes);
|
||||
item2 = opal_list_get_next(item2)) {
|
||||
orte_ras_node_t* node = (orte_ras_node_t*) item2;
|
||||
for (item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_mapped_node_t* node = (orte_mapped_node_t*) item;
|
||||
|
||||
rc = orte_pointer_array_add(&idx, mca_pls_bproc_component.active_node_names,
|
||||
strdup(node->node_name));
|
||||
|
||||
|
||||
}
|
||||
rc = orte_pointer_array_add(&idx, mca_pls_bproc_component.active_node_names,
|
||||
strdup(node->nodename));
|
||||
}
|
||||
|
||||
/* setup subscription for each node so we can detect
|
||||
when the node's state changes, usefull for aborting when
|
||||
when the node's state changes, useful for aborting when
|
||||
a bproc node up and dies */
|
||||
|
||||
rc = orte_pls_bproc_monitor_nodes();
|
||||
@ -1193,9 +1179,11 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* launch the daemons on all the nodes which have processes assign to them */
|
||||
rc = orte_pls_bproc_launch_daemons(cellid, &map->app->env, node_array,
|
||||
node_array_len, context, num_processes,
|
||||
/* launch the daemons on all the nodes which have processes assigned to them.
|
||||
* We need to send along an appropriate environment for the daemons. Since
|
||||
* there must be at least ONE app_context, we can just take that one
|
||||
*/
|
||||
rc = orte_pls_bproc_launch_daemons(cellid, &map->apps[0]->env, map,
|
||||
vpid_start, jobid, &num_daemons);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -1238,44 +1226,35 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
|
||||
}
|
||||
}
|
||||
|
||||
context = 0;
|
||||
vpid_launch = vpid_start;
|
||||
opal_output(0, "launching apps");
|
||||
/* for each application context launch the app */
|
||||
for(item = opal_list_get_first(&mapping);
|
||||
item != opal_list_get_end(&mapping);
|
||||
item = opal_list_get_next(item)) {
|
||||
map = (orte_rmaps_base_map_t*)item;
|
||||
|
||||
rc = orte_rmgr.check_context_cwd(map->app, true);
|
||||
for(context=0; context < map->num_apps; context++) {
|
||||
rc = orte_rmgr.check_context_cwd(map->apps[context], true);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
rc = orte_pls_bproc_launch_app(cellid, jobid, map, num_processes,
|
||||
vpid_launch, vpid_start, map->app->idx,
|
||||
rc = orte_pls_bproc_launch_app(cellid, jobid, map, num_processes, num_slots,
|
||||
vpid_launch, vpid_start, context,
|
||||
node_array[context], node_array_len[context]);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
free(node_array[context]);
|
||||
context++;
|
||||
vpid_launch = vpid_start + mca_pls_bproc_component.num_procs;
|
||||
}
|
||||
|
||||
mca_pls_bproc_component.done_launching = true;
|
||||
cleanup:
|
||||
chdir(cwd_save);
|
||||
while(NULL != (item = opal_list_remove_first(&mapping))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
if(NULL != node_array) {
|
||||
free(node_array);
|
||||
}
|
||||
if(NULL != node_array_len) {
|
||||
free(node_array_len);
|
||||
}
|
||||
OBJ_DESTRUCT(&mapping);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -81,11 +81,9 @@
|
||||
#include "orte/mca/gpr/gpr.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ras/ras_types.h"
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
#include "orte/mca/smr/smr.h"
|
||||
|
||||
/* clean up for ORTE 2.0 */
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
|
||||
#include "orte/mca/pls/pls.h"
|
||||
#include "orte/mca/pls/base/pls_private.h"
|
||||
#include "orte/mca/pls/gridengine/pls_gridengine.h"
|
||||
@ -104,21 +102,10 @@ orte_pls_base_module_t orte_pls_gridengine_module = {
|
||||
orte_pls_gridengine_finalize
|
||||
};
|
||||
|
||||
/**
|
||||
* struct used to have enough information to clean up the state of the
|
||||
* universe if a daemon aborts
|
||||
*/
|
||||
struct gridengine_daemon_info_t {
|
||||
opal_object_t super;
|
||||
orte_process_name_t *name;
|
||||
char *nodename;
|
||||
};
|
||||
typedef struct gridengine_daemon_info_t gridengine_daemon_info_t;
|
||||
static OBJ_CLASS_INSTANCE(gridengine_daemon_info_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
static void set_handler_default(int sig);
|
||||
#if 0
|
||||
static int update_slot_keyval(orte_ras_node_t* node, int* slot_cnt);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Fill the orted_path variable with the directory to the orted
|
||||
@ -146,7 +133,7 @@ static int orte_pls_gridengine_fill_orted_path(char** orted_path)
|
||||
*/
|
||||
static void orte_pls_gridengine_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
{
|
||||
gridengine_daemon_info_t *info = (gridengine_daemon_info_t*) cbdata;
|
||||
orte_pls_daemon_info_t *info = (orte_pls_daemon_info_t*) cbdata;
|
||||
int rc;
|
||||
|
||||
/* if qrsh exited abnormally, set the daemon's state to aborted
|
||||
@ -204,16 +191,16 @@ static void orte_pls_gridengine_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
*/
|
||||
int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
|
||||
{
|
||||
opal_list_t mapping;
|
||||
opal_list_item_t* m_item, *n_item;
|
||||
orte_job_map_t *map;
|
||||
opal_list_item_t *n_item;
|
||||
orte_std_cntr_t num_nodes;
|
||||
orte_vpid_t vpid;
|
||||
int node_name_index1;
|
||||
int node_name_index2;
|
||||
int proc_name_index;
|
||||
int orted_index;
|
||||
int call_yield_index;
|
||||
char *jobid_string;
|
||||
char *prefix_dir;
|
||||
char *uri, *param;
|
||||
char **argv;
|
||||
int argc;
|
||||
@ -229,26 +216,19 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
|
||||
*/
|
||||
OBJ_CONSTRUCT(&daemons, opal_list_t);
|
||||
|
||||
/* Query the list of nodes allocated and mapped to this job.
|
||||
/* Get the map for this job.
|
||||
* We need the entire mapping for a couple of reasons:
|
||||
* - need the prefix to start with.
|
||||
* - need to know if we are launching on a subset of the allocated nodes
|
||||
* All other mapping responsibilities fall to orted in the fork PLS
|
||||
*/
|
||||
OBJ_CONSTRUCT(&mapping, opal_list_t);
|
||||
rc = orte_rmaps_base_get_map(jobid, &mapping);
|
||||
rc = orte_rmaps.get_job_map(&map, jobid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
num_nodes = 0;
|
||||
for(m_item = opal_list_get_first(&mapping);
|
||||
m_item != opal_list_get_end(&mapping);
|
||||
m_item = opal_list_get_next(m_item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
|
||||
num_nodes += opal_list_get_size(&map->nodes);
|
||||
}
|
||||
num_nodes = (orte_std_cntr_t)opal_list_get_size(&map->nodes);
|
||||
|
||||
/*
|
||||
* Allocate a range of vpids for the daemons.
|
||||
@ -353,10 +333,6 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
|
||||
free(uri);
|
||||
free(param);
|
||||
|
||||
opal_argv_append(&argc, &argv, "--mpi-call-yield");
|
||||
call_yield_index = argc;
|
||||
opal_argv_append(&argc, &argv, "0");
|
||||
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
param = opal_argv_join(argv, ' ');
|
||||
if (NULL != param) {
|
||||
@ -368,332 +344,292 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
|
||||
|
||||
/* Figure out the basenames for the libdir and bindir. There is a
|
||||
lengthy comment about this in pls_rsh_module.c explaining all
|
||||
the rationale for how / why we're doing this. */
|
||||
the rationale for how / why we're doing this.
|
||||
*/
|
||||
|
||||
lib_base = opal_basename(OPAL_LIBDIR);
|
||||
bin_base = opal_basename(OPAL_BINDIR);
|
||||
|
||||
/*
|
||||
* Iterate through each of the contexts
|
||||
/* See the note about prefix_dir in the orte/mca/pls/slurm/pls_slurm.c
|
||||
* module. Fo here, just note that we must have at least one app_context,
|
||||
* and we take the prefix_dir from that first one.
|
||||
*/
|
||||
for(m_item = opal_list_get_first(&mapping);
|
||||
m_item != opal_list_get_end(&mapping);
|
||||
m_item = opal_list_get_next(m_item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
|
||||
char *prefix_dir = map->app->prefix_dir;
|
||||
prefix_dir = map->apps[0]->prefix_dir;
|
||||
|
||||
/*
|
||||
* For each of the contexts - iterate through the nodes.
|
||||
*/
|
||||
for(n_item = opal_list_get_first(&map->nodes);
|
||||
n_item != opal_list_get_end(&map->nodes);
|
||||
n_item = opal_list_get_next(n_item)) {
|
||||
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
|
||||
orte_ras_node_t* ras_node = rmaps_node->node;
|
||||
orte_process_name_t* name;
|
||||
pid_t pid;
|
||||
char *exec_path, *orted_path;
|
||||
char **exec_argv;
|
||||
int remain_slot_cnt;
|
||||
/*
|
||||
* Iterate through the nodes.
|
||||
*/
|
||||
for(n_item = opal_list_get_first(&map->nodes);
|
||||
n_item != opal_list_get_end(&map->nodes);
|
||||
n_item = opal_list_get_next(n_item)) {
|
||||
orte_mapped_node_t* rmaps_node = (orte_mapped_node_t*)n_item;
|
||||
orte_process_name_t* name;
|
||||
pid_t pid;
|
||||
char *exec_path, *orted_path;
|
||||
char **exec_argv;
|
||||
#if 0
|
||||
int remain_slot_cnt;
|
||||
|
||||
/* RHC - I don't believe this code is really necessary any longer.
|
||||
* The mapper correctly accounts for slots that have already been
|
||||
* used. Even if another job starts to run between the time the
|
||||
* mapper maps this job and we get to this point, the new job
|
||||
* will have gone through the mapper and will not overuse the node.
|
||||
* As this code consumes considerable time, I have sliced it out
|
||||
* of the code for now.
|
||||
*
|
||||
* query the registry for the remaining gridengine slot count on
|
||||
* this node, and update the registry for the count for the
|
||||
* current process launch */
|
||||
if (ORTE_SUCCESS != (rc =
|
||||
update_slot_keyval(ras_node, &remain_slot_cnt))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* already launched on this node */
|
||||
if(ras_node->node_launched++ != 0) {
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: already launched on this node, %s",
|
||||
ras_node->node_name);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* query the registry for the remaining gridengine slot count on
|
||||
* this node, and update the registry for the count for the
|
||||
* current process launch */
|
||||
if (ORTE_SUCCESS != (rc =
|
||||
update_slot_keyval(ras_node, &remain_slot_cnt))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* check for the unlikely scenario, because gridengine ras already
|
||||
* checks for it, but still provide a check there. */
|
||||
if (remain_slot_cnt < 0) {
|
||||
opal_show_help("help-pls-gridengine.txt", "insufficient-pe-slot",
|
||||
true, ras_node->node_name, true);
|
||||
exit(-1); /* exit instead of return ORTE_ERR_OUT_OF_RESOURCE */
|
||||
}
|
||||
|
||||
/* setup node name */
|
||||
free(argv[node_name_index1]);
|
||||
if (NULL != ras_node->node_username &&
|
||||
0 != strlen (ras_node->node_username)) {
|
||||
asprintf(&argv[node_name_index1], "%s@%s",
|
||||
ras_node->node_username, ras_node->node_name);
|
||||
} else {
|
||||
argv[node_name_index1] = strdup(ras_node->node_name);
|
||||
}
|
||||
|
||||
free(argv[node_name_index2]);
|
||||
argv[node_name_index2] = strdup(ras_node->node_name);
|
||||
|
||||
/* initialize daemons process name */
|
||||
rc = orte_ns.create_process_name(&name, ras_node->node_cellid, 0, vpid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* new daemon - setup to record its info */
|
||||
dmn = OBJ_NEW(orte_pls_daemon_info_t);
|
||||
dmn->active_job = jobid;
|
||||
dmn->cell = ras_node->node_cellid;
|
||||
dmn->nodename = strdup(ras_node->node_name);
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
opal_list_append(&daemons, &dmn->super);
|
||||
|
||||
#ifdef __WINDOWS__
|
||||
printf("Unimplemented feature for windows\n");
|
||||
return ORTE_ERR_NOT_IMPLEMENTED;
|
||||
#else
|
||||
/* fork a child to do qrsh */
|
||||
pid = fork();
|
||||
/* check for the unlikely scenario, because gridengine ras already
|
||||
* checks for it, but still provide a check there. */
|
||||
if (remain_slot_cnt < 0) {
|
||||
opal_show_help("help-pls-gridengine.txt", "insufficient-pe-slot",
|
||||
true, ras_node->node_name, true);
|
||||
exit(-1); /* exit instead of return ORTE_ERR_OUT_OF_RESOURCE */
|
||||
}
|
||||
#endif
|
||||
if (pid < 0) {
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
goto cleanup;
|
||||
|
||||
/* setup node name */
|
||||
free(argv[node_name_index1]);
|
||||
if (NULL != rmaps_node->username &&
|
||||
0 != strlen (rmaps_node->username)) {
|
||||
asprintf(&argv[node_name_index1], "%s@%s",
|
||||
rmaps_node->username, rmaps_node->nodename);
|
||||
} else {
|
||||
argv[node_name_index1] = strdup(rmaps_node->nodename);
|
||||
}
|
||||
|
||||
free(argv[node_name_index2]);
|
||||
argv[node_name_index2] = strdup(rmaps_node->nodename);
|
||||
|
||||
/* initialize daemons process name */
|
||||
rc = orte_ns.create_process_name(&name, rmaps_node->cell, 0, vpid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* new daemon - setup to record its info */
|
||||
dmn = OBJ_NEW(orte_pls_daemon_info_t);
|
||||
dmn->active_job = jobid;
|
||||
dmn->cell = rmaps_node->cell;
|
||||
dmn->nodename = strdup(rmaps_node->nodename);
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
opal_list_append(&daemons, &dmn->super);
|
||||
|
||||
#ifdef __WINDOWS__
|
||||
printf("Unimplemented feature for windows\n");
|
||||
return ORTE_ERR_NOT_IMPLEMENTED;
|
||||
#else
|
||||
/* fork a child to do qrsh */
|
||||
pid = fork();
|
||||
#endif
|
||||
if (pid < 0) {
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* child */
|
||||
if (pid == 0) {
|
||||
char* name_string;
|
||||
char** env;
|
||||
char* var;
|
||||
long fd, fdmax = sysconf(_SC_OPEN_MAX);
|
||||
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: launching on node %s",
|
||||
rmaps_node->nodename);
|
||||
}
|
||||
|
||||
/* child */
|
||||
if (pid == 0) {
|
||||
char* name_string;
|
||||
char** env;
|
||||
char* var;
|
||||
long fd, fdmax = sysconf(_SC_OPEN_MAX);
|
||||
/* setting exec_argv and exec_path for qrsh */
|
||||
exec_argv = &argv[0];
|
||||
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: launching on node %s",
|
||||
ras_node->node_name);
|
||||
sge_root = getenv("SGE_ROOT");
|
||||
sge_arch = getenv("ARC");
|
||||
asprintf(&exec_path, "%s/bin/%s/qrsh", sge_root, sge_arch);
|
||||
exec_path = opal_path_findv(exec_path, X_OK, environ, NULL);
|
||||
if (NULL == exec_path) {
|
||||
opal_show_help("help-pls-gridengine.txt", "bad-qrsh-path",
|
||||
true, exec_path, sge_root, sge_arch);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: exec_argv[0]=%s, exec_path=%s",
|
||||
exec_argv[0], exec_path);
|
||||
}
|
||||
|
||||
/* setting orted_path for orted */
|
||||
orted_path = opal_path_findv(exec_argv[orted_index], 0, environ, NULL);
|
||||
|
||||
if (NULL == orted_path && NULL == prefix_dir) {
|
||||
rc = orte_pls_gridengine_fill_orted_path(&orted_path);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* set the progress engine schedule for this node.
|
||||
* if node_slots is set to zero, then we default to
|
||||
* NOT being oversubscribed
|
||||
*/
|
||||
if (ras_node->node_slots > 0 &&
|
||||
(orte_std_cntr_t)opal_list_get_size(&rmaps_node->node_procs) > ras_node->node_slots) {
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
|
||||
ras_node->node_slots, opal_list_get_size(&rmaps_node->node_procs));
|
||||
}
|
||||
free(argv[call_yield_index]);
|
||||
argv[call_yield_index] = strdup("1");
|
||||
} else {
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: not oversubscribed -- setting mpi_yield_when_idle to 0");
|
||||
}
|
||||
free(argv[call_yield_index]);
|
||||
argv[call_yield_index] = strdup("0");
|
||||
} else {
|
||||
if (NULL != prefix_dir) {
|
||||
orted_path = opal_os_path( false, prefix_dir, bin_base, "orted", NULL );
|
||||
}
|
||||
|
||||
/* setting exec_argv and exec_path for qrsh */
|
||||
exec_argv = &argv[0];
|
||||
|
||||
sge_root = getenv("SGE_ROOT");
|
||||
sge_arch = getenv("ARC");
|
||||
asprintf(&exec_path, "%s/bin/%s/qrsh", sge_root, sge_arch);
|
||||
exec_path = opal_path_findv(exec_path, X_OK, environ, NULL);
|
||||
if (NULL == exec_path) {
|
||||
opal_show_help("help-pls-gridengine.txt", "bad-qrsh-path",
|
||||
true, exec_path, sge_root, sge_arch);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: exec_argv[0]=%s, exec_path=%s",
|
||||
exec_argv[0], exec_path);
|
||||
}
|
||||
|
||||
/* setting orted_path for orted */
|
||||
orted_path = opal_path_findv(exec_argv[orted_index], 0, environ, NULL);
|
||||
|
||||
if (NULL == orted_path && NULL == prefix_dir) {
|
||||
/* If we yet did not fill up the orted_path, do so now */
|
||||
if (NULL == orted_path) {
|
||||
rc = orte_pls_gridengine_fill_orted_path(&orted_path);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
} else {
|
||||
if (NULL != prefix_dir) {
|
||||
orted_path = opal_os_path( false, prefix_dir, bin_base, "orted", NULL );
|
||||
}
|
||||
/* If we yet did not fill up the orted_path, do so now */
|
||||
if (NULL == orted_path) {
|
||||
rc = orte_pls_gridengine_fill_orted_path(&orted_path);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
asprintf(&argv[orted_index], orted_path);
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: orted_path=%s", orted_path);
|
||||
}
|
||||
|
||||
/* If we have a prefix, then modify the PATH and
|
||||
LD_LIBRARY_PATH environment variables. We're
|
||||
already in the child process, so it's ok to modify
|
||||
environ. */
|
||||
if (NULL != prefix_dir) {
|
||||
char *oldenv, *newenv;
|
||||
|
||||
/* Reset PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
|
||||
oldenv = getenv("PATH");
|
||||
if (NULL != oldenv) {
|
||||
char *temp;
|
||||
asprintf(&temp, "%s:%s", newenv, oldenv);
|
||||
free( newenv );
|
||||
newenv = temp;
|
||||
}
|
||||
opal_setenv("PATH", newenv, true, &environ);
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: reset PATH: %s", newenv);
|
||||
}
|
||||
free(newenv);
|
||||
|
||||
/* Reset LD_LIBRARY_PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
|
||||
oldenv = getenv("LD_LIBRARY_PATH");
|
||||
if (NULL != oldenv) {
|
||||
char* temp;
|
||||
asprintf(&temp, "%s:%s", newenv, oldenv);
|
||||
free(newenv);
|
||||
newenv = temp;
|
||||
}
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: reset LD_LIBRARY_PATH: %s",
|
||||
newenv);
|
||||
}
|
||||
free(newenv);
|
||||
}
|
||||
|
||||
var = getenv("HOME");
|
||||
if (NULL != var) {
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: changing to directory %s",
|
||||
var);
|
||||
}
|
||||
/* Ignore errors -- what are we going to do?
|
||||
(and we ignore errors on the remote nodes
|
||||
in the fork pls, so this is consistent) */
|
||||
chdir(var);
|
||||
}
|
||||
|
||||
/* setup process name */
|
||||
rc = orte_ns.get_proc_name_string(&name_string, name);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:gridengine: unable to create process name");
|
||||
exit(-1);
|
||||
}
|
||||
free(argv[proc_name_index]);
|
||||
argv[proc_name_index] = strdup(name_string);
|
||||
|
||||
if (!mca_pls_gridengine_component.debug) {
|
||||
/* setup stdin */
|
||||
int fd = open("/dev/null", O_RDWR, 0);
|
||||
dup2(fd, 0);
|
||||
close(fd);
|
||||
}
|
||||
|
||||
/* close all file descriptors w/ exception of stdin/stdout/stderr */
|
||||
for(fd=3; fd<fdmax; fd++)
|
||||
close(fd);
|
||||
|
||||
/* Set signal handlers back to the default. Do this close
|
||||
to the execve() because the event library may (and likely
|
||||
will) reset them. If we don't do this, the event
|
||||
library may have left some set that, at least on some
|
||||
OS's, don't get reset via fork() or exec(). Hence, the
|
||||
orted could be unkillable (for example). */
|
||||
|
||||
set_handler_default(SIGTERM);
|
||||
set_handler_default(SIGINT);
|
||||
#ifndef __WINDOWS__
|
||||
set_handler_default(SIGHUP);
|
||||
set_handler_default(SIGPIPE);
|
||||
#endif
|
||||
set_handler_default(SIGCHLD);
|
||||
|
||||
/* Unblock all signals, for many of the same reasons that
|
||||
we set the default handlers, above. This is noticable
|
||||
on Linux where the event library blocks SIGTERM, but we
|
||||
don't want that blocked by the orted (or, more
|
||||
specifically, we don't want it to be blocked by the
|
||||
orted and then inherited by the ORTE processes that it
|
||||
forks, making them unkillable by SIGTERM). */
|
||||
#ifndef __WINDOWS__
|
||||
sigprocmask(0, 0, &sigs);
|
||||
sigprocmask(SIG_UNBLOCK, &sigs, 0);
|
||||
#endif
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
var = mca_base_param_environ_variable("seed",NULL,NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
|
||||
/* exec the daemon */
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
param = opal_argv_join(exec_argv, ' ');
|
||||
if (NULL != param) {
|
||||
opal_output(0, "pls:gridengine: executing: %s", param);
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
execve(exec_path, exec_argv, env);
|
||||
opal_output(0, "pls:gridengine: execve failed with errno=%d\n", errno);
|
||||
exit(-1);
|
||||
} else { /* parent */
|
||||
gridengine_daemon_info_t *daemon_info;
|
||||
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: parent");
|
||||
}
|
||||
|
||||
/* setup callback on sigchild - wait until setup above is complete
|
||||
* as the callback can occur in the call to orte_wait_cb
|
||||
*/
|
||||
daemon_info = OBJ_NEW(gridengine_daemon_info_t);
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(daemon_info->name), name, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
daemon_info->nodename= strdup(ras_node->node_name);
|
||||
orte_wait_cb(pid, orte_pls_gridengine_wait_daemon, daemon_info);
|
||||
|
||||
vpid++;
|
||||
}
|
||||
free(name);
|
||||
asprintf(&argv[orted_index], orted_path);
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: orted_path=%s", orted_path);
|
||||
}
|
||||
|
||||
/* If we have a prefix, then modify the PATH and
|
||||
LD_LIBRARY_PATH environment variables. We're
|
||||
already in the child process, so it's ok to modify
|
||||
environ. */
|
||||
if (NULL != prefix_dir) {
|
||||
char *oldenv, *newenv;
|
||||
|
||||
/* Reset PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
|
||||
oldenv = getenv("PATH");
|
||||
if (NULL != oldenv) {
|
||||
char *temp;
|
||||
asprintf(&temp, "%s:%s", newenv, oldenv);
|
||||
free( newenv );
|
||||
newenv = temp;
|
||||
}
|
||||
opal_setenv("PATH", newenv, true, &environ);
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: reset PATH: %s", newenv);
|
||||
}
|
||||
free(newenv);
|
||||
|
||||
/* Reset LD_LIBRARY_PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
|
||||
oldenv = getenv("LD_LIBRARY_PATH");
|
||||
if (NULL != oldenv) {
|
||||
char* temp;
|
||||
asprintf(&temp, "%s:%s", newenv, oldenv);
|
||||
free(newenv);
|
||||
newenv = temp;
|
||||
}
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: reset LD_LIBRARY_PATH: %s",
|
||||
newenv);
|
||||
}
|
||||
free(newenv);
|
||||
}
|
||||
|
||||
var = getenv("HOME");
|
||||
if (NULL != var) {
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: changing to directory %s",
|
||||
var);
|
||||
}
|
||||
/* Ignore errors -- what are we going to do?
|
||||
(and we ignore errors on the remote nodes
|
||||
in the fork pls, so this is consistent) */
|
||||
chdir(var);
|
||||
}
|
||||
|
||||
/* setup process name */
|
||||
rc = orte_ns.get_proc_name_string(&name_string, name);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:gridengine: unable to create process name");
|
||||
exit(-1);
|
||||
}
|
||||
free(argv[proc_name_index]);
|
||||
argv[proc_name_index] = strdup(name_string);
|
||||
|
||||
if (!mca_pls_gridengine_component.debug) {
|
||||
/* setup stdin */
|
||||
int fd = open("/dev/null", O_RDWR, 0);
|
||||
dup2(fd, 0);
|
||||
close(fd);
|
||||
}
|
||||
|
||||
/* close all file descriptors w/ exception of stdin/stdout/stderr */
|
||||
for(fd=3; fd<fdmax; fd++)
|
||||
close(fd);
|
||||
|
||||
/* Set signal handlers back to the default. Do this close
|
||||
to the execve() because the event library may (and likely
|
||||
will) reset them. If we don't do this, the event
|
||||
library may have left some set that, at least on some
|
||||
OS's, don't get reset via fork() or exec(). Hence, the
|
||||
orted could be unkillable (for example). */
|
||||
|
||||
set_handler_default(SIGTERM);
|
||||
set_handler_default(SIGINT);
|
||||
#ifndef __WINDOWS__
|
||||
set_handler_default(SIGHUP);
|
||||
set_handler_default(SIGPIPE);
|
||||
#endif
|
||||
set_handler_default(SIGCHLD);
|
||||
|
||||
/* Unblock all signals, for many of the same reasons that
|
||||
we set the default handlers, above. This is noticable
|
||||
on Linux where the event library blocks SIGTERM, but we
|
||||
don't want that blocked by the orted (or, more
|
||||
specifically, we don't want it to be blocked by the
|
||||
orted and then inherited by the ORTE processes that it
|
||||
forks, making them unkillable by SIGTERM). */
|
||||
#ifndef __WINDOWS__
|
||||
sigprocmask(0, 0, &sigs);
|
||||
sigprocmask(SIG_UNBLOCK, &sigs, 0);
|
||||
#endif
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
var = mca_base_param_environ_variable("seed",NULL,NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
|
||||
/* exec the daemon */
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
param = opal_argv_join(exec_argv, ' ');
|
||||
if (NULL != param) {
|
||||
opal_output(0, "pls:gridengine: executing: %s", param);
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
execve(exec_path, exec_argv, env);
|
||||
opal_output(0, "pls:gridengine: execve failed with errno=%d\n", errno);
|
||||
exit(-1);
|
||||
} else { /* parent */
|
||||
if (mca_pls_gridengine_component.debug) {
|
||||
opal_output(0, "pls:gridengine: parent");
|
||||
}
|
||||
|
||||
/* setup callback on sigchild - wait until setup above is complete
|
||||
* as the callback can occur in the call to orte_wait_cb
|
||||
*/
|
||||
orte_wait_cb(pid, orte_pls_gridengine_wait_daemon, dmn);
|
||||
|
||||
vpid++;
|
||||
}
|
||||
free(name);
|
||||
}
|
||||
|
||||
/* all done, so store the daemon info on the registry */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
|
||||
cleanup:
|
||||
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
|
||||
OBJ_RELEASE(m_item);
|
||||
}
|
||||
OBJ_DESTRUCT(&mapping);
|
||||
|
||||
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
|
||||
OBJ_RELEASE(m_item);
|
||||
}
|
||||
OBJ_DESTRUCT(&daemons);
|
||||
|
||||
if (NULL != lib_base) {
|
||||
free(lib_base);
|
||||
}
|
||||
@ -707,6 +643,7 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
|
||||
return rc;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/**
|
||||
* Query the registry for the gridengine slot count, and update it
|
||||
*/
|
||||
@ -808,6 +745,7 @@ static int update_slot_keyval(orte_ras_node_t* ras_node, int* slot_cnt)
|
||||
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Query the registry for all nodes participating in the job
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/gpr/gpr.h"
|
||||
#include "orte/mca/ns/ns.h"
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
#include "orte/mca/rmgr/rmgr.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/smr/smr.h"
|
||||
@ -46,10 +47,7 @@
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
/* remove for ORTE 2.0 */
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
#include "orte/mca/sds/base/base.h"
|
||||
#include "orte/mca/rmgr/base/rmgr_private.h"
|
||||
|
||||
|
||||
#include "orte/mca/pls/pls.h"
|
||||
#include "orte/mca/pls/poe/pls_poe.h"
|
||||
@ -338,33 +336,34 @@ poe_wait_job - call back when POE finish
|
||||
*/
|
||||
static void poe_wait_job(pid_t pid, int status, void* cbdata)
|
||||
{
|
||||
opal_list_t map;
|
||||
opal_list_item_t* item;
|
||||
orte_job_map_t *map;
|
||||
opal_list_item_t *item, *item2;
|
||||
int rc;
|
||||
|
||||
/* query allocation for the job */
|
||||
OBJ_CONSTRUCT(&map, opal_list_t);
|
||||
rc = orte_rmaps_base_get_map(mca_pls_poe_component.jobid,&map);
|
||||
rc = orte_rmaps.get_job_map(&map, mca_pls_poe_component.jobid);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
for(item = opal_list_get_first(&map);
|
||||
item != opal_list_get_end(&map);
|
||||
for(item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item;
|
||||
orte_std_cntr_t i;
|
||||
orte_mapped_node_t* node = (orte_mapped_node_t*) item;
|
||||
|
||||
for(i = 0 ; i < map->num_procs ; ++i) {
|
||||
orte_session_dir_finalize(&(map->procs[i])->proc_name);
|
||||
rc = orte_smr.set_proc_state(&(map->procs[i]->proc_name),
|
||||
for (item2 = opal_list_get_first(&node->procs);
|
||||
item2 != opal_list_get_end(&node->procs);
|
||||
item2 = opal_list_get_next(item2)) {
|
||||
orte_mapped_proc_t* proc = (orte_mapped_proc_t*)item2;
|
||||
|
||||
orte_session_dir_finalize(&(proc->name));
|
||||
rc = orte_smr.set_proc_state(&(proc->name),
|
||||
ORTE_PROC_STATE_ABORTED, status);
|
||||
}
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
OBJ_DESTRUCT(&map);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -379,7 +378,7 @@ poe_create_cmd_file - create POE command file
|
||||
static int poe_create_cmd_file(
|
||||
FILE *cfp,
|
||||
orte_app_context_t* context,
|
||||
orte_rmaps_base_proc_t* proc,
|
||||
orte_mapped_proc_t* proc,
|
||||
orte_vpid_t vpid_start,
|
||||
orte_vpid_t vpid_range)
|
||||
{
|
||||
@ -428,7 +427,7 @@ static int poe_create_cmd_file(
|
||||
free(uri);
|
||||
|
||||
/* push name into environment */
|
||||
orte_ns_nds_env_put(&proc->proc_name, vpid_start, vpid_range, &environ_copy);
|
||||
orte_ns_nds_env_put(&proc->name, vpid_start, vpid_range, &environ_copy);
|
||||
|
||||
if (context->argv == NULL) {
|
||||
context->argv = malloc(sizeof(char*)*2);
|
||||
@ -461,8 +460,8 @@ poe_launch_interactive - launch an interactive job
|
||||
*/
|
||||
static inline int poe_launch_interactive_job(orte_jobid_t jobid)
|
||||
{
|
||||
opal_list_t map, nodes, mapping_list;
|
||||
opal_list_item_t* item;
|
||||
orte_job_map_t *map;
|
||||
opal_list_item_t *item, *item2;
|
||||
orte_vpid_t vpid_start, vpid_range;
|
||||
orte_std_cntr_t num_nodes, num_procs;
|
||||
FILE *hfp, *cfp;
|
||||
@ -479,50 +478,46 @@ static inline int poe_launch_interactive_job(orte_jobid_t jobid)
|
||||
|
||||
mca_pls_poe_component.jobid = jobid;
|
||||
|
||||
OBJ_CONSTRUCT(&nodes, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
|
||||
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
|
||||
/* get the map for this job */
|
||||
rc = orte_rmaps.get_job_map(&map, jobid);
|
||||
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
|
||||
|
||||
num_nodes = opal_list_get_size(&nodes);
|
||||
|
||||
num_nodes = opal_list_get_size(&map->nodes);
|
||||
|
||||
if(!strncmp(mca_pls_poe_component.resource_allocation,"hostfile",8)) {
|
||||
|
||||
/* Create a temporary hostlist file if user specify */
|
||||
/* Create a temporary hostlist file if user specify */
|
||||
|
||||
if( (NULL==(mca_pls_poe_component.hostfile=tempnam(NULL,NULL))) ||
|
||||
(NULL==(hfp=fopen(mca_pls_poe_component.hostfile,"w"))) ) {
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
for(item = opal_list_get_first(&nodes);
|
||||
item != opal_list_get_end(&nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_ras_node_t* node = (orte_ras_node_t*)item;
|
||||
fprintf(hfp,"%s\n",node->node_name);
|
||||
}
|
||||
fclose(hfp);
|
||||
if( (NULL==(mca_pls_poe_component.hostfile=tempnam(NULL,NULL))) ||
|
||||
(NULL==(hfp=fopen(mca_pls_poe_component.hostfile,"w"))) ) {
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
rc = orte_rmgr_base_get_job_slots(jobid, &num_procs);
|
||||
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
|
||||
|
||||
OBJ_CONSTRUCT(&map, opal_list_t);
|
||||
rc = orte_rmaps_base_get_map(jobid,&map);
|
||||
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
|
||||
for(item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
|
||||
fprintf(hfp,"%s\n",node->nodename);
|
||||
}
|
||||
fclose(hfp);
|
||||
|
||||
rc = orte_rmgr.get_vpid_range(jobid, &vpid_start, &vpid_range);
|
||||
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
|
||||
|
||||
/* Create a temporary POE command file */
|
||||
|
||||
for(item = opal_list_get_first(&map);
|
||||
item != opal_list_get_end(&map);
|
||||
num_procs = 0;
|
||||
for(item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rmaps_base_map_t* map2 = (orte_rmaps_base_map_t*)item;
|
||||
orte_std_cntr_t i;
|
||||
for(i=0; i<map2->num_procs; i++) {
|
||||
rc = poe_create_cmd_file(cfp, map2->app, map2->procs[i], vpid_start, vpid_range);
|
||||
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
|
||||
|
||||
for (item2 = opal_list_get_first(&node->procs);
|
||||
item2 != opal_list_get_end(&node->procs);
|
||||
item2 = opal_list_get_next(item2)) {
|
||||
orte_mapped_proc_t* proc = (orte_mapped_proc_t*)item2;
|
||||
rc = poe_create_cmd_file(cfp, map->apps[proc->app_idx], proc, vpid_start, vpid_range);
|
||||
if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
|
||||
num_procs++;
|
||||
}
|
||||
}
|
||||
fclose(cfp);
|
||||
@ -587,20 +582,6 @@ static inline int poe_launch_interactive_job(orte_jobid_t jobid)
|
||||
|
||||
|
||||
cleanup:
|
||||
while(NULL != (item = opal_list_remove_first(&map))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&map);
|
||||
while(NULL != (item = opal_list_remove_first(&nodes))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
|
||||
while(NULL != (item = opal_list_remove_first(&mapping_list))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&mapping_list);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -81,7 +81,7 @@
|
||||
#include "orte/mca/gpr/gpr.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ras/ras_types.h"
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
#include "orte/mca/smr/smr.h"
|
||||
|
||||
#include "orte/mca/pls/pls.h"
|
||||
@ -109,17 +109,6 @@ orte_pls_base_module_t orte_pls_rsh_module = {
|
||||
orte_pls_rsh_finalize
|
||||
};
|
||||
|
||||
/* struct used to have enough information to clean up the state of the
|
||||
universe if a daemon aborts */
|
||||
struct rsh_daemon_info_t {
|
||||
opal_object_t super;
|
||||
orte_ras_node_t* node;
|
||||
orte_jobid_t jobid;
|
||||
};
|
||||
typedef struct rsh_daemon_info_t rsh_daemon_info_t;
|
||||
static OBJ_CLASS_INSTANCE(rsh_daemon_info_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
static void set_handler_default(int sig);
|
||||
|
||||
enum {
|
||||
@ -140,11 +129,15 @@ static const char * orte_pls_rsh_shell_name[] = {
|
||||
"unknown"
|
||||
};
|
||||
|
||||
/* local global storage of the list of active daemons */
|
||||
opal_list_t active_daemons;
|
||||
|
||||
|
||||
/**
|
||||
* Check the Shell variable on the specified node
|
||||
*/
|
||||
|
||||
static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell)
|
||||
static int orte_pls_rsh_probe(orte_mapped_node_t * node, orte_pls_rsh_shell * shell)
|
||||
{
|
||||
char ** argv;
|
||||
int argc, rc, nfds, i;
|
||||
@ -156,7 +149,7 @@ static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell
|
||||
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: going to check SHELL variable on node %s\n",
|
||||
node->node_name);
|
||||
node->nodename);
|
||||
}
|
||||
*shell = ORTE_PLS_RSH_SHELL_UNKNOWN;
|
||||
/*
|
||||
@ -164,7 +157,7 @@ static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell
|
||||
*/
|
||||
argv = opal_argv_copy(mca_pls_rsh_component.agent_argv);
|
||||
argc = mca_pls_rsh_component.agent_argc;
|
||||
opal_argv_append(&argc, &argv, node->node_name);
|
||||
opal_argv_append(&argc, &argv, node->nodename);
|
||||
opal_argv_append(&argc, &argv, "echo $SHELL");
|
||||
if (pipe(fd)) {
|
||||
opal_output(0, "pls:rsh: pipe failed with errno=%d\n", errno);
|
||||
@ -251,7 +244,7 @@ static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell
|
||||
}
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: node:%s has SHELL:%s\n",
|
||||
node->node_name, orte_pls_rsh_shell_name[*shell]);
|
||||
node->nodename, orte_pls_rsh_shell_name[*shell]);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
@ -283,9 +276,10 @@ static int orte_pls_rsh_fill_exec_path ( char ** exec_path)
|
||||
|
||||
static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
{
|
||||
rsh_daemon_info_t *info = (rsh_daemon_info_t*) cbdata;
|
||||
opal_list_t map;
|
||||
opal_list_item_t* item;
|
||||
orte_pls_daemon_info_t *info = (orte_pls_daemon_info_t*) cbdata;
|
||||
orte_mapped_node_t *node;
|
||||
orte_mapped_proc_t *proc;
|
||||
opal_list_item_t *item;
|
||||
int rc;
|
||||
|
||||
/* if ssh exited abnormally, set the child processes to aborted
|
||||
@ -298,11 +292,8 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
*/
|
||||
if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) {
|
||||
/* get the mapping for our node so we can cancel the right things */
|
||||
OBJ_CONSTRUCT(&map, opal_list_t);
|
||||
rc = orte_rmaps_base_get_node_map(orte_process_info.my_name->cellid,
|
||||
info->jobid,
|
||||
info->node->node_name,
|
||||
&map);
|
||||
rc = orte_rmaps.get_node_map(&node, info->cell,
|
||||
info->nodename, info->active_job);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
@ -310,33 +301,30 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
|
||||
/* set state of all processes associated with the daemon as
|
||||
terminated */
|
||||
for(item = opal_list_get_first(&map);
|
||||
item != opal_list_get_end(&map);
|
||||
for(item = opal_list_get_first(&node->procs);
|
||||
item != opal_list_get_end(&node->procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item;
|
||||
orte_std_cntr_t i;
|
||||
proc = (orte_mapped_proc_t*) item;
|
||||
|
||||
for (i = 0 ; i < map->num_procs ; ++i) {
|
||||
/* Clean up the session directory as if we were the
|
||||
process itself. This covers the case where the
|
||||
process died abnormally and didn't cleanup its own
|
||||
session directory. */
|
||||
|
||||
orte_session_dir_finalize(&(map->procs[i])->proc_name);
|
||||
orte_session_dir_finalize(&(proc->name));
|
||||
|
||||
rc = orte_smr.set_proc_state(&(map->procs[i]->proc_name),
|
||||
rc = orte_smr.set_proc_state(&(proc->name),
|
||||
ORTE_PROC_STATE_ABORTED, status);
|
||||
}
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
}
|
||||
OBJ_DESTRUCT(&map);
|
||||
OBJ_RELEASE(node);
|
||||
|
||||
cleanup:
|
||||
/* tell the user something went wrong */
|
||||
opal_output(0, "ERROR: A daemon on node %s failed to start as expected.",
|
||||
info->node->node_name);
|
||||
info->nodename);
|
||||
opal_output(0, "ERROR: There may be more information available from");
|
||||
opal_output(0, "ERROR: the remote shell (see above).");
|
||||
|
||||
@ -361,6 +349,15 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
|
||||
/* release any waiting threads */
|
||||
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
|
||||
/* tell the system that this daemon is gone */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_remove_daemon(info))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
/* remove the daemon from our local list */
|
||||
opal_list_remove_item(&active_daemons, &info->super);
|
||||
OBJ_RELEASE(info);
|
||||
|
||||
if (mca_pls_rsh_component.num_children-- >=
|
||||
mca_pls_rsh_component.num_concurrent ||
|
||||
mca_pls_rsh_component.num_children == 0) {
|
||||
@ -368,9 +365,6 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);
|
||||
|
||||
/* cleanup */
|
||||
OBJ_RELEASE(info->node);
|
||||
OBJ_RELEASE(info);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -380,18 +374,19 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
|
||||
int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
{
|
||||
opal_list_t mapping;
|
||||
opal_list_item_t* m_item, *n_item;
|
||||
orte_job_map_t *map;
|
||||
opal_list_item_t *n_item;
|
||||
orte_mapped_node_t *rmaps_node;
|
||||
orte_std_cntr_t num_nodes;
|
||||
orte_vpid_t vpid;
|
||||
int node_name_index1;
|
||||
int node_name_index2;
|
||||
int proc_name_index;
|
||||
int local_exec_index, local_exec_index_end;
|
||||
int call_yield_index;
|
||||
char *jobid_string;
|
||||
char *uri, *param;
|
||||
char **argv, **tmp;
|
||||
char *prefix_dir;
|
||||
int argc;
|
||||
int rc;
|
||||
sigset_t sigs;
|
||||
@ -399,34 +394,46 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
bool remote_bash = false, remote_csh = false;
|
||||
bool local_bash = false, local_csh = false;
|
||||
char *lib_base = NULL, *bin_base = NULL;
|
||||
opal_list_t daemons;
|
||||
orte_pls_daemon_info_t *dmn;
|
||||
|
||||
/* setup a list that will contain the info for all the daemons
|
||||
* so we can store it on the registry when done
|
||||
* so we can store it on the registry when done and use it
|
||||
* locally to track their state
|
||||
*/
|
||||
OBJ_CONSTRUCT(&daemons, opal_list_t);
|
||||
OBJ_CONSTRUCT(&active_daemons, opal_list_t);
|
||||
|
||||
/* Query the list of nodes allocated and mapped to this job.
|
||||
/* Get the map for this job
|
||||
* We need the entire mapping for a couple of reasons:
|
||||
* - need the prefix to start with.
|
||||
* - need to know if we are launching on a subset of the allocated nodes
|
||||
* - need to know the nodes we are launching on
|
||||
* All other mapping responsibilities fall to orted in the fork PLS
|
||||
*/
|
||||
OBJ_CONSTRUCT(&mapping, opal_list_t);
|
||||
rc = orte_rmaps_base_get_map(jobid, &mapping);
|
||||
rc = orte_rmaps.get_job_map(&map, jobid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
num_nodes = 0;
|
||||
for(m_item = opal_list_get_first(&mapping);
|
||||
m_item != opal_list_get_end(&mapping);
|
||||
m_item = opal_list_get_next(m_item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
|
||||
num_nodes += opal_list_get_size(&map->nodes);
|
||||
}
|
||||
num_nodes = (orte_std_cntr_t)opal_list_get_size(&map->nodes);
|
||||
|
||||
/*
|
||||
* After a discussion between Ralph & Jeff, we concluded that we
|
||||
* really are handling the prefix dir option incorrectly. It currently
|
||||
* is associated with an app_context, yet it really refers to the
|
||||
* location where OpenRTE/Open MPI is installed on a NODE. Fixing
|
||||
* this right now would involve significant change to orterun as well
|
||||
* as elsewhere, so we will intentionally leave this incorrect at this
|
||||
* point. The error, however, is identical to that seen in all prior
|
||||
* releases of OpenRTE/Open MPI, so our behavior is no worse than before.
|
||||
*
|
||||
* A note to fix this, along with ideas on how to do so, has been filed
|
||||
* on the project's Trac system under "feature enhancement".
|
||||
*
|
||||
* For now, default to the prefix_dir provided in the first app_context.
|
||||
* Since there always MUST be at least one app_context, we are safe in
|
||||
* doing this.
|
||||
*/
|
||||
prefix_dir = map->apps[0]->prefix_dir;
|
||||
|
||||
/*
|
||||
* Allocate a range of vpids for the daemons.
|
||||
*/
|
||||
@ -475,12 +482,8 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
}
|
||||
} else {
|
||||
orte_pls_rsh_shell shell;
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)opal_list_get_first(&mapping);
|
||||
orte_rmaps_base_node_t* rmaps_node =
|
||||
(orte_rmaps_base_node_t*)opal_list_get_first(&map->nodes);
|
||||
orte_ras_node_t* node = rmaps_node->node;
|
||||
|
||||
rc = orte_pls_rsh_probe(node, &shell);
|
||||
rmaps_node = (orte_mapped_node_t*)opal_list_get_first(&map->nodes);
|
||||
rc = orte_pls_rsh_probe(rmaps_node, &shell);
|
||||
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -582,10 +585,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
free(uri);
|
||||
free(param);
|
||||
|
||||
opal_argv_append(&argc, &argv, "--mpi-call-yield");
|
||||
call_yield_index = argc;
|
||||
opal_argv_append(&argc, &argv, "0");
|
||||
|
||||
local_exec_index_end = argc;
|
||||
if (!(remote_csh || remote_bash)) {
|
||||
opal_argv_append(&argc, &argv, ")");
|
||||
@ -633,378 +632,337 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
|
||||
bin_base = opal_basename(OPAL_BINDIR);
|
||||
|
||||
/*
|
||||
* Iterate through each of the contexts
|
||||
* Iterate through each of the nodes
|
||||
*/
|
||||
for(m_item = opal_list_get_first(&mapping);
|
||||
m_item != opal_list_get_end(&mapping);
|
||||
m_item = opal_list_get_next(m_item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
|
||||
char * prefix_dir = map->app->prefix_dir;
|
||||
for(n_item = opal_list_get_first(&map->nodes);
|
||||
n_item != opal_list_get_end(&map->nodes);
|
||||
n_item = opal_list_get_next(n_item)) {
|
||||
orte_process_name_t* name;
|
||||
pid_t pid;
|
||||
char *exec_path;
|
||||
char **exec_argv;
|
||||
|
||||
/*
|
||||
* For each of the contexts - iterate through the nodes.
|
||||
*/
|
||||
for(n_item = opal_list_get_first(&map->nodes);
|
||||
n_item != opal_list_get_end(&map->nodes);
|
||||
n_item = opal_list_get_next(n_item)) {
|
||||
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
|
||||
orte_ras_node_t* ras_node = rmaps_node->node;
|
||||
orte_process_name_t* name;
|
||||
pid_t pid;
|
||||
char *exec_path;
|
||||
char **exec_argv;
|
||||
rmaps_node = (orte_mapped_node_t*)n_item;
|
||||
|
||||
/* already launched on this node */
|
||||
if(ras_node->node_launched++ != 0)
|
||||
continue;
|
||||
/* new daemon - setup to record its info */
|
||||
dmn = OBJ_NEW(orte_pls_daemon_info_t);
|
||||
dmn->active_job = jobid;
|
||||
opal_list_append(&active_daemons, &dmn->super);
|
||||
|
||||
/* setup node name */
|
||||
free(argv[node_name_index1]);
|
||||
if (NULL != rmaps_node->username &&
|
||||
0 != strlen (rmaps_node->username)) {
|
||||
asprintf (&argv[node_name_index1], "%s@%s",
|
||||
rmaps_node->username, rmaps_node->nodename);
|
||||
} else {
|
||||
argv[node_name_index1] = strdup(rmaps_node->nodename);
|
||||
}
|
||||
|
||||
/* new daemon - setup to record its info */
|
||||
dmn = OBJ_NEW(orte_pls_daemon_info_t);
|
||||
opal_list_append(&daemons, &dmn->super);
|
||||
|
||||
/* setup node name */
|
||||
free(argv[node_name_index1]);
|
||||
if (NULL != ras_node->node_username &&
|
||||
0 != strlen (ras_node->node_username)) {
|
||||
asprintf (&argv[node_name_index1], "%s@%s",
|
||||
ras_node->node_username, ras_node->node_name);
|
||||
} else {
|
||||
argv[node_name_index1] = strdup(ras_node->node_name);
|
||||
free(argv[node_name_index2]);
|
||||
argv[node_name_index2] = strdup(rmaps_node->nodename);
|
||||
|
||||
/* save it in the daemon info */
|
||||
dmn->nodename = strdup(rmaps_node->nodename);
|
||||
|
||||
/* initialize daemons process name */
|
||||
rc = orte_ns.create_process_name(&name, rmaps_node->cell, 0, vpid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* save it in the daemon info */
|
||||
dmn->cell = rmaps_node->cell;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* rsh a child to exec the rsh/ssh session */
|
||||
|
||||
/* set the process state to "launched" */
|
||||
if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(name, ORTE_PROC_STATE_LAUNCHED, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* child */
|
||||
if (pid == 0) {
|
||||
char* name_string;
|
||||
char** env;
|
||||
char* var;
|
||||
long fd, fdmax = sysconf(_SC_OPEN_MAX);
|
||||
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: launching on node %s\n",
|
||||
rmaps_node->nodename);
|
||||
}
|
||||
|
||||
free(argv[node_name_index2]);
|
||||
argv[node_name_index2] = strdup(ras_node->node_name);
|
||||
|
||||
/* save it in the daemon info */
|
||||
dmn->nodename = strdup(ras_node->node_name);
|
||||
|
||||
/* initialize daemons process name */
|
||||
rc = orte_ns.create_process_name(&name, ras_node->node_cellid, 0, vpid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* save it in the daemon info */
|
||||
dmn->cell = ras_node->node_cellid;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* rsh a child to exec the rsh/ssh session */
|
||||
|
||||
/* set the process state to "launched" */
|
||||
if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(name, ORTE_PROC_STATE_LAUNCHED, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* child */
|
||||
if (pid == 0) {
|
||||
char* name_string;
|
||||
char** env;
|
||||
char* var;
|
||||
long fd, fdmax = sysconf(_SC_OPEN_MAX);
|
||||
/* We don't need to sense an oversubscribed condition and set the sched_yield
|
||||
* for the node as we are only launching the daemons at this time. The daemons
|
||||
* are now smart enough to set the oversubscribed condition themselves when
|
||||
* they launch the local procs.
|
||||
*/
|
||||
|
||||
/* Is this a local launch?
|
||||
*
|
||||
* Not all node names may be resolvable (if we found
|
||||
* localhost in the hostfile, for example). So first
|
||||
* check trivial case of node_name being same as the
|
||||
* current nodename, which must be local. If that doesn't
|
||||
* match, check using ifislocal().
|
||||
*/
|
||||
if (0 == strcmp(rmaps_node->nodename, orte_system_info.nodename) ||
|
||||
opal_ifislocal(rmaps_node->nodename)) {
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: launching on node %s\n",
|
||||
ras_node->node_name);
|
||||
opal_output(0, "pls:rsh: %s is a LOCAL node\n",
|
||||
rmaps_node->nodename);
|
||||
}
|
||||
exec_argv = &argv[local_exec_index];
|
||||
exec_path = opal_path_findv(exec_argv[0], 0, environ, NULL);
|
||||
|
||||
/* set the progress engine schedule for this node.
|
||||
* if node_slots is set to zero, then we default to
|
||||
* NOT being oversubscribed
|
||||
*/
|
||||
if (ras_node->node_slots > 0 &&
|
||||
(orte_std_cntr_t)opal_list_get_size(&rmaps_node->node_procs) > ras_node->node_slots) {
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
|
||||
ras_node->node_slots, opal_list_get_size(&rmaps_node->node_procs));
|
||||
if (NULL == exec_path && NULL == prefix_dir) {
|
||||
rc = orte_pls_rsh_fill_exec_path (&exec_path);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
free(argv[call_yield_index]);
|
||||
argv[call_yield_index] = strdup("1");
|
||||
} else {
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: not oversubscribed -- setting mpi_yield_when_idle to 0");
|
||||
if (NULL != prefix_dir) {
|
||||
exec_path = opal_os_path( false, prefix_dir, bin_base, "orted", NULL );
|
||||
}
|
||||
free(argv[call_yield_index]);
|
||||
argv[call_yield_index] = strdup("0");
|
||||
}
|
||||
|
||||
/* Is this a local launch?
|
||||
*
|
||||
* Not all node names may be resolvable (if we found
|
||||
* localhost in the hostfile, for example). So first
|
||||
* check trivial case of node_name being same as the
|
||||
* current nodename, which must be local. If that doesn't
|
||||
* match, check using ifislocal().
|
||||
*/
|
||||
if (0 == strcmp(ras_node->node_name, orte_system_info.nodename) ||
|
||||
opal_ifislocal(ras_node->node_name)) {
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: %s is a LOCAL node\n",
|
||||
ras_node->node_name);
|
||||
}
|
||||
exec_argv = &argv[local_exec_index];
|
||||
exec_path = opal_path_findv(exec_argv[0], 0, environ, NULL);
|
||||
|
||||
if (NULL == exec_path && NULL == prefix_dir) {
|
||||
/* If we yet did not fill up the execpath, do so now */
|
||||
if (NULL == exec_path) {
|
||||
rc = orte_pls_rsh_fill_exec_path (&exec_path);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
} else {
|
||||
if (NULL != prefix_dir) {
|
||||
exec_path = opal_os_path( false, prefix_dir, bin_base, "orted", NULL );
|
||||
}
|
||||
/* If we yet did not fill up the execpath, do so now */
|
||||
if (NULL == exec_path) {
|
||||
rc = orte_pls_rsh_fill_exec_path (&exec_path);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If we have a prefix, then modify the PATH and
|
||||
LD_LIBRARY_PATH environment variables. We're
|
||||
already in the child process, so it's ok to modify
|
||||
environ. */
|
||||
if (NULL != prefix_dir) {
|
||||
char *oldenv, *newenv;
|
||||
/* If we have a prefix, then modify the PATH and
|
||||
LD_LIBRARY_PATH environment variables. We're
|
||||
already in the child process, so it's ok to modify
|
||||
environ. */
|
||||
if (NULL != prefix_dir) {
|
||||
char *oldenv, *newenv;
|
||||
|
||||
/* Reset PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
|
||||
oldenv = getenv("PATH");
|
||||
if (NULL != oldenv) {
|
||||
char *temp;
|
||||
asprintf(&temp, "%s:%s", newenv, oldenv );
|
||||
free( newenv );
|
||||
newenv = temp;
|
||||
}
|
||||
opal_setenv("PATH", newenv, true, &environ);
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: reset PATH: %s", newenv);
|
||||
}
|
||||
free(newenv);
|
||||
|
||||
/* Reset LD_LIBRARY_PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
|
||||
oldenv = getenv("LD_LIBRARY_PATH");
|
||||
if (NULL != oldenv) {
|
||||
char* temp;
|
||||
asprintf(&temp, "%s:%s", newenv, oldenv);
|
||||
free(newenv);
|
||||
newenv = temp;
|
||||
}
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: reset LD_LIBRARY_PATH: %s",
|
||||
newenv);
|
||||
}
|
||||
free(newenv);
|
||||
/* Reset PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
|
||||
oldenv = getenv("PATH");
|
||||
if (NULL != oldenv) {
|
||||
char *temp;
|
||||
asprintf(&temp, "%s:%s", newenv, oldenv );
|
||||
free( newenv );
|
||||
newenv = temp;
|
||||
}
|
||||
|
||||
/* Since this is a local execution, we need to
|
||||
potentially whack the final ")" in the argv (if
|
||||
sh/csh conditionals, from above). Note that we're
|
||||
modifying the argv[] in the child process, so
|
||||
there's no need to save this and restore it
|
||||
afterward -- the parent's argv[] is unmodified. */
|
||||
if (NULL != argv[local_exec_index_end]) {
|
||||
free(argv[local_exec_index_end]);
|
||||
argv[local_exec_index_end] = NULL;
|
||||
}
|
||||
|
||||
/* Finally, chdir($HOME) because we're making the
|
||||
assumption that this is what will happen on
|
||||
remote nodes (via rsh/ssh). This allows a user
|
||||
to specify a path that is relative to $HOME for
|
||||
both the cwd and argv[0] and it will work on
|
||||
all nodes -- including the local nost.
|
||||
Otherwise, it would work on remote nodes and
|
||||
not the local node. If the user does not start
|
||||
in $HOME on the remote nodes... well... let's
|
||||
hope they start in $HOME. :-) */
|
||||
var = getenv("HOME");
|
||||
if (NULL != var) {
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: changing to directory %s",
|
||||
var);
|
||||
}
|
||||
/* Ignore errors -- what are we going to do?
|
||||
(and we ignore errors on the remote nodes
|
||||
in the fork pls, so this is consistent) */
|
||||
chdir(var);
|
||||
}
|
||||
} else {
|
||||
opal_setenv("PATH", newenv, true, &environ);
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: %s is a REMOTE node\n",
|
||||
ras_node->node_name);
|
||||
opal_output(0, "pls:rsh: reset PATH: %s", newenv);
|
||||
}
|
||||
exec_argv = argv;
|
||||
exec_path = strdup(mca_pls_rsh_component.agent_path);
|
||||
free(newenv);
|
||||
|
||||
if (NULL != prefix_dir) {
|
||||
if (remote_bash) {
|
||||
asprintf (&argv[local_exec_index],
|
||||
"PATH=%s/%s:$PATH ; export PATH ; "
|
||||
"LD_LIBRARY_PATH=%s/%s:$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; "
|
||||
"%s/%s/%s",
|
||||
prefix_dir, bin_base,
|
||||
prefix_dir, lib_base,
|
||||
prefix_dir, bin_base,
|
||||
mca_pls_rsh_component.orted);
|
||||
}
|
||||
if (remote_csh) {
|
||||
/* [t]csh is a bit more challenging -- we
|
||||
have to check whether LD_LIBRARY_PATH
|
||||
is already set before we try to set it.
|
||||
Must be very careful about obeying
|
||||
[t]csh's order of evaluation and not
|
||||
using a variable before it is defined.
|
||||
See this thread for more details:
|
||||
http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */
|
||||
asprintf (&argv[local_exec_index],
|
||||
"set path = ( %s/%s $path ) ; "
|
||||
"if ( $?LD_LIBRARY_PATH == 1 ) "
|
||||
"set OMPI_have_llp ; "
|
||||
"if ( $?LD_LIBRARY_PATH == 0 ) "
|
||||
"setenv LD_LIBRARY_PATH %s/%s ; "
|
||||
"if ( $?OMPI_have_llp == 1 ) "
|
||||
"setenv LD_LIBRARY_PATH %s/%s:$LD_LIBRARY_PATH ; "
|
||||
"%s/%s/%s",
|
||||
prefix_dir, bin_base,
|
||||
prefix_dir, lib_base,
|
||||
prefix_dir, lib_base,
|
||||
prefix_dir, bin_base,
|
||||
mca_pls_rsh_component.orted);
|
||||
}
|
||||
/* Reset LD_LIBRARY_PATH */
|
||||
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
|
||||
oldenv = getenv("LD_LIBRARY_PATH");
|
||||
if (NULL != oldenv) {
|
||||
char* temp;
|
||||
asprintf(&temp, "%s:%s", newenv, oldenv);
|
||||
free(newenv);
|
||||
newenv = temp;
|
||||
}
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: reset LD_LIBRARY_PATH: %s",
|
||||
newenv);
|
||||
}
|
||||
free(newenv);
|
||||
}
|
||||
|
||||
/* setup process name */
|
||||
rc = orte_ns.get_proc_name_string(&name_string, name);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "orte_pls_rsh: unable to create process name");
|
||||
exit(-1);
|
||||
}
|
||||
free(argv[proc_name_index]);
|
||||
argv[proc_name_index] = strdup(name_string);
|
||||
|
||||
if (!mca_pls_rsh_component.debug) {
|
||||
/* setup stdin */
|
||||
int fd = open("/dev/null", O_RDWR);
|
||||
dup2(fd, 0);
|
||||
close(fd);
|
||||
/* Since this is a local execution, we need to
|
||||
potentially whack the final ")" in the argv (if
|
||||
sh/csh conditionals, from above). Note that we're
|
||||
modifying the argv[] in the child process, so
|
||||
there's no need to save this and restore it
|
||||
afterward -- the parent's argv[] is unmodified. */
|
||||
if (NULL != argv[local_exec_index_end]) {
|
||||
free(argv[local_exec_index_end]);
|
||||
argv[local_exec_index_end] = NULL;
|
||||
}
|
||||
|
||||
/* close all file descriptors w/ exception of stdin/stdout/stderr */
|
||||
for(fd=3; fd<fdmax; fd++)
|
||||
close(fd);
|
||||
|
||||
/* Set signal handlers back to the default. Do this close
|
||||
to the execve() because the event library may (and likely
|
||||
will) reset them. If we don't do this, the event
|
||||
library may have left some set that, at least on some
|
||||
OS's, don't get reset via fork() or exec(). Hence, the
|
||||
orted could be unkillable (for example). */
|
||||
|
||||
set_handler_default(SIGTERM);
|
||||
set_handler_default(SIGINT);
|
||||
set_handler_default(SIGHUP);
|
||||
set_handler_default(SIGPIPE);
|
||||
set_handler_default(SIGCHLD);
|
||||
|
||||
/* Unblock all signals, for many of the same reasons that
|
||||
we set the default handlers, above. This is noticable
|
||||
on Linux where the event library blocks SIGTERM, but we
|
||||
don't want that blocked by the orted (or, more
|
||||
specifically, we don't want it to be blocked by the
|
||||
orted and then inherited by the ORTE processes that it
|
||||
forks, making them unkillable by SIGTERM). */
|
||||
sigprocmask(0, 0, &sigs);
|
||||
sigprocmask(SIG_UNBLOCK, &sigs, 0);
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
var = mca_base_param_environ_variable("seed",NULL,NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
|
||||
/* exec the daemon */
|
||||
/* Finally, chdir($HOME) because we're making the
|
||||
assumption that this is what will happen on
|
||||
remote nodes (via rsh/ssh). This allows a user
|
||||
to specify a path that is relative to $HOME for
|
||||
both the cwd and argv[0] and it will work on
|
||||
all nodes -- including the local nost.
|
||||
Otherwise, it would work on remote nodes and
|
||||
not the local node. If the user does not start
|
||||
in $HOME on the remote nodes... well... let's
|
||||
hope they start in $HOME. :-) */
|
||||
var = getenv("HOME");
|
||||
if (NULL != var) {
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
opal_output(0, "pls:rsh: changing to directory %s",
|
||||
var);
|
||||
}
|
||||
/* Ignore errors -- what are we going to do?
|
||||
(and we ignore errors on the remote nodes
|
||||
in the fork pls, so this is consistent) */
|
||||
chdir(var);
|
||||
}
|
||||
} else {
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
param = opal_argv_join(exec_argv, ' ');
|
||||
if (NULL != param) {
|
||||
opal_output(0, "pls:rsh: executing: %s", param);
|
||||
free(param);
|
||||
opal_output(0, "pls:rsh: %s is a REMOTE node\n",
|
||||
rmaps_node->nodename);
|
||||
}
|
||||
exec_argv = argv;
|
||||
exec_path = strdup(mca_pls_rsh_component.agent_path);
|
||||
|
||||
if (NULL != prefix_dir) {
|
||||
if (remote_bash) {
|
||||
asprintf (&argv[local_exec_index],
|
||||
"PATH=%s/%s:$PATH ; export PATH ; "
|
||||
"LD_LIBRARY_PATH=%s/%s:$LD_LIBRARY_PATH ; export LD_LIBRARY_PATH ; "
|
||||
"%s/%s/%s",
|
||||
prefix_dir, bin_base,
|
||||
prefix_dir, lib_base,
|
||||
prefix_dir, bin_base,
|
||||
mca_pls_rsh_component.orted);
|
||||
}
|
||||
if (remote_csh) {
|
||||
/* [t]csh is a bit more challenging -- we
|
||||
have to check whether LD_LIBRARY_PATH
|
||||
is already set before we try to set it.
|
||||
Must be very careful about obeying
|
||||
[t]csh's order of evaluation and not
|
||||
using a variable before it is defined.
|
||||
See this thread for more details:
|
||||
http://www.open-mpi.org/community/lists/users/2006/01/0517.php. */
|
||||
asprintf (&argv[local_exec_index],
|
||||
"set path = ( %s/%s $path ) ; "
|
||||
"if ( $?LD_LIBRARY_PATH == 1 ) "
|
||||
"set OMPI_have_llp ; "
|
||||
"if ( $?LD_LIBRARY_PATH == 0 ) "
|
||||
"setenv LD_LIBRARY_PATH %s/%s ; "
|
||||
"if ( $?OMPI_have_llp == 1 ) "
|
||||
"setenv LD_LIBRARY_PATH %s/%s:$LD_LIBRARY_PATH ; "
|
||||
"%s/%s/%s",
|
||||
prefix_dir, bin_base,
|
||||
prefix_dir, lib_base,
|
||||
prefix_dir, lib_base,
|
||||
prefix_dir, bin_base,
|
||||
mca_pls_rsh_component.orted);
|
||||
}
|
||||
}
|
||||
execve(exec_path, exec_argv, env);
|
||||
opal_output(0, "pls:rsh: execv failed with errno=%d\n", errno);
|
||||
exit(-1);
|
||||
|
||||
} else { /* father */
|
||||
rsh_daemon_info_t *daemon_info;
|
||||
|
||||
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
|
||||
/* JJH Bug:
|
||||
* If we are in '--debug-daemons' we keep the ssh connection
|
||||
* alive for the span of the run. If we use this option
|
||||
* AND we launch on more than "num_concurrent" machines
|
||||
* then we will deadlock. No connections are terminated
|
||||
* until the job is complete, no job is started
|
||||
* since all the orteds are waiting for all the others
|
||||
* to come online, and the others ore not launched because
|
||||
* we are waiting on those that have started to terminate
|
||||
* their ssh tunnels. :(
|
||||
*/
|
||||
if (mca_pls_rsh_component.num_children++ >=
|
||||
mca_pls_rsh_component.num_concurrent) {
|
||||
opal_condition_wait(&mca_pls_rsh_component.cond, &mca_pls_rsh_component.lock);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);
|
||||
|
||||
/* setup callback on sigchild - wait until setup above is complete
|
||||
* as the callback can occur in the call to orte_wait_cb
|
||||
*/
|
||||
daemon_info = OBJ_NEW(rsh_daemon_info_t);
|
||||
OBJ_RETAIN(ras_node);
|
||||
daemon_info->node = ras_node;
|
||||
daemon_info->jobid = jobid;
|
||||
orte_wait_cb(pid, orte_pls_rsh_wait_daemon, daemon_info);
|
||||
|
||||
/* if required - add delay to avoid problems w/ X11 authentication */
|
||||
if (mca_pls_rsh_component.debug && mca_pls_rsh_component.delay) {
|
||||
sleep(mca_pls_rsh_component.delay);
|
||||
}
|
||||
vpid++;
|
||||
}
|
||||
free(name);
|
||||
|
||||
/* setup process name */
|
||||
rc = orte_ns.get_proc_name_string(&name_string, name);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "orte_pls_rsh: unable to create process name");
|
||||
exit(-1);
|
||||
}
|
||||
free(argv[proc_name_index]);
|
||||
argv[proc_name_index] = strdup(name_string);
|
||||
|
||||
if (!mca_pls_rsh_component.debug) {
|
||||
/* setup stdin */
|
||||
int fd = open("/dev/null", O_RDWR);
|
||||
dup2(fd, 0);
|
||||
close(fd);
|
||||
}
|
||||
|
||||
/* close all file descriptors w/ exception of stdin/stdout/stderr */
|
||||
for(fd=3; fd<fdmax; fd++)
|
||||
close(fd);
|
||||
|
||||
/* Set signal handlers back to the default. Do this close
|
||||
to the execve() because the event library may (and likely
|
||||
will) reset them. If we don't do this, the event
|
||||
library may have left some set that, at least on some
|
||||
OS's, don't get reset via fork() or exec(). Hence, the
|
||||
orted could be unkillable (for example). */
|
||||
|
||||
set_handler_default(SIGTERM);
|
||||
set_handler_default(SIGINT);
|
||||
set_handler_default(SIGHUP);
|
||||
set_handler_default(SIGPIPE);
|
||||
set_handler_default(SIGCHLD);
|
||||
|
||||
/* Unblock all signals, for many of the same reasons that
|
||||
we set the default handlers, above. This is noticable
|
||||
on Linux where the event library blocks SIGTERM, but we
|
||||
don't want that blocked by the orted (or, more
|
||||
specifically, we don't want it to be blocked by the
|
||||
orted and then inherited by the ORTE processes that it
|
||||
forks, making them unkillable by SIGTERM). */
|
||||
sigprocmask(0, 0, &sigs);
|
||||
sigprocmask(SIG_UNBLOCK, &sigs, 0);
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
var = mca_base_param_environ_variable("seed",NULL,NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
|
||||
/* exec the daemon */
|
||||
if (mca_pls_rsh_component.debug) {
|
||||
param = opal_argv_join(exec_argv, ' ');
|
||||
if (NULL != param) {
|
||||
opal_output(0, "pls:rsh: executing: %s", param);
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
execve(exec_path, exec_argv, env);
|
||||
opal_output(0, "pls:rsh: execv failed with errno=%d\n", errno);
|
||||
exit(-1);
|
||||
|
||||
} else { /* father */
|
||||
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
|
||||
/* JJH Bug:
|
||||
* If we are in '--debug-daemons' we keep the ssh connection
|
||||
* alive for the span of the run. If we use this option
|
||||
* AND we launch on more than "num_concurrent" machines
|
||||
* then we will deadlock. No connections are terminated
|
||||
* until the job is complete, no job is started
|
||||
* since all the orteds are waiting for all the others
|
||||
* to come online, and the others ore not launched because
|
||||
* we are waiting on those that have started to terminate
|
||||
* their ssh tunnels. :(
|
||||
*/
|
||||
if (mca_pls_rsh_component.num_children++ >=
|
||||
mca_pls_rsh_component.num_concurrent) {
|
||||
opal_condition_wait(&mca_pls_rsh_component.cond, &mca_pls_rsh_component.lock);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);
|
||||
|
||||
/* setup callback on sigchild - wait until setup above is complete
|
||||
* as the callback can occur in the call to orte_wait_cb
|
||||
*/
|
||||
orte_wait_cb(pid, orte_pls_rsh_wait_daemon, dmn);
|
||||
|
||||
/* if required - add delay to avoid problems w/ X11 authentication */
|
||||
if (mca_pls_rsh_component.debug && mca_pls_rsh_component.delay) {
|
||||
sleep(mca_pls_rsh_component.delay);
|
||||
}
|
||||
vpid++;
|
||||
}
|
||||
free(name);
|
||||
}
|
||||
|
||||
/* all done, so store the daemon info on the registry */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&active_daemons))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
|
||||
OBJ_RELEASE(m_item);
|
||||
}
|
||||
OBJ_DESTRUCT(&mapping);
|
||||
|
||||
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
|
||||
OBJ_RELEASE(m_item);
|
||||
}
|
||||
OBJ_DESTRUCT(&daemons);
|
||||
/* OBJ_RELEASE(map); */
|
||||
|
||||
if (NULL != lib_base) {
|
||||
free(lib_base);
|
||||
|
@ -59,7 +59,7 @@
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/smr/smr.h"
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
|
||||
#include "orte/mca/pls/pls.h"
|
||||
#include "orte/mca/pls/base/pls_private.h"
|
||||
@ -109,8 +109,8 @@ extern char **environ;
|
||||
|
||||
static int pls_slurm_launch_job(orte_jobid_t jobid)
|
||||
{
|
||||
opal_list_t nodes, mapping_list;
|
||||
opal_list_item_t *item, *item2;
|
||||
orte_job_map_t *map;
|
||||
opal_list_item_t *item;
|
||||
size_t num_nodes;
|
||||
orte_vpid_t vpid;
|
||||
char *jobid_string;
|
||||
@ -137,15 +137,13 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
|
||||
*/
|
||||
OBJ_CONSTRUCT(&daemons, opal_list_t);
|
||||
|
||||
/* Query the list of nodes allocated and mapped to this job.
|
||||
/* Query the map for this job.
|
||||
* We need the entire mapping for a couple of reasons:
|
||||
* - need the prefix to start with.
|
||||
* - need to know if we are launching on a subset of the allocated nodes
|
||||
* All other mapping responsibilities fall to orted in the fork PLS
|
||||
*/
|
||||
OBJ_CONSTRUCT(&nodes, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
|
||||
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
|
||||
rc = orte_rmaps.get_job_map(&map, jobid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
@ -153,7 +151,7 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
|
||||
/*
|
||||
* Allocate a range of vpids for the daemons.
|
||||
*/
|
||||
num_nodes = opal_list_get_size(&nodes);
|
||||
num_nodes = opal_list_get_size(&map->nodes);
|
||||
if (num_nodes == 0) {
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
@ -206,12 +204,12 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
|
||||
nodelist_argv = NULL;
|
||||
nodelist_argc = 0;
|
||||
|
||||
for (item = opal_list_get_first(&nodes);
|
||||
item != opal_list_get_end(&nodes);
|
||||
for (item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_ras_node_t* node = (orte_ras_node_t*)item;
|
||||
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
|
||||
|
||||
opal_argv_append(&nodelist_argc, &nodelist_argv, node->node_name);
|
||||
opal_argv_append(&nodelist_argc, &nodelist_argv, node->nodename);
|
||||
}
|
||||
nodelist_flat = opal_argv_join(nodelist_argv, ',');
|
||||
asprintf(&tmp, "--nodelist=%s", nodelist_flat);
|
||||
@ -308,80 +306,59 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
|
||||
}
|
||||
}
|
||||
|
||||
/* Bookkeeping -- save the node names */
|
||||
/* Copy the prefix-directory specified in the
|
||||
corresponding app_context. If there are multiple,
|
||||
different prefix's in the app context, complain (i.e., only
|
||||
allow one --prefix option for the entire slurm run -- we
|
||||
don't support different --prefix'es for different nodes in
|
||||
the SLURM pls) */
|
||||
cur_prefix = NULL;
|
||||
for (item = opal_list_get_first(&nodes);
|
||||
item != opal_list_get_end(&nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_ras_node_t* node = (orte_ras_node_t*)item;
|
||||
opal_list_t map;
|
||||
size_t num_processes;
|
||||
for (i=0; i < map->num_apps; i++) {
|
||||
char * app_prefix_dir = map->apps[i]->prefix_dir;
|
||||
/* Check for already set cur_prefix_dir -- if different,
|
||||
complain */
|
||||
if (NULL != app_prefix_dir) {
|
||||
if (NULL != cur_prefix &&
|
||||
0 != strcmp (cur_prefix, app_prefix_dir)) {
|
||||
opal_show_help("help-pls-slurm.txt", "multiple-prefixes",
|
||||
true, cur_prefix, app_prefix_dir);
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&map, opal_list_t);
|
||||
/* Get the mapping of this very node */
|
||||
rc = orte_rmaps_base_get_node_map(orte_process_info.my_name->cellid,
|
||||
jobid,
|
||||
node->node_name,
|
||||
&map);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Copy the prefix-directory specified within the
|
||||
corresponding app_context. If there are multiple,
|
||||
different prefix's in the app context, complain (i.e., only
|
||||
allow one --prefix option for the entire slurm run -- we
|
||||
don't support different --prefix'es for different nodes in
|
||||
the SLURM pls) */
|
||||
num_processes = 0;
|
||||
for (item2 = opal_list_get_first(&map);
|
||||
item2 != opal_list_get_end(&map);
|
||||
item2 = opal_list_get_next(item2)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item2;
|
||||
char * app_prefix_dir = map->app->prefix_dir;
|
||||
|
||||
/* Increment the number of processes allocated to this node
|
||||
* This allows us to accurately test for oversubscription */
|
||||
num_processes += map->num_procs;
|
||||
|
||||
/* Check for already set cur_prefix_dir -- if different,
|
||||
complain */
|
||||
if (NULL != app_prefix_dir) {
|
||||
if (NULL != cur_prefix &&
|
||||
0 != strcmp (cur_prefix, app_prefix_dir)) {
|
||||
opal_show_help("help-pls-slurm.txt", "multiple-prefixes",
|
||||
true, cur_prefix, app_prefix_dir);
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
|
||||
/* If not yet set, copy it; iff set, then it's the
|
||||
same anyway */
|
||||
if (NULL == cur_prefix) {
|
||||
cur_prefix = strdup(map->app->prefix_dir);
|
||||
if (mca_pls_slurm_component.debug) {
|
||||
opal_output (0, "pls:slurm: Set prefix:%s",
|
||||
cur_prefix);
|
||||
}
|
||||
/* If not yet set, copy it; iff set, then it's the
|
||||
same anyway */
|
||||
if (NULL == cur_prefix) {
|
||||
cur_prefix = strdup(app_prefix_dir);
|
||||
if (mca_pls_slurm_component.debug) {
|
||||
opal_output (0, "pls:slurm: Set prefix:%s",
|
||||
cur_prefix);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* setup the daemon info for each node */
|
||||
vpid = 0;
|
||||
for (item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
|
||||
|
||||
/* record the daemons info for this node */
|
||||
dmn = OBJ_NEW(orte_pls_daemon_info_t);
|
||||
dmn->cell = node->node_cellid;
|
||||
dmn->nodename = strdup(node->node_name);
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(dmn->name), node->node_cellid, 0, vpid))) {
|
||||
dmn->active_job = jobid;
|
||||
dmn->cell = node->cell;
|
||||
dmn->nodename = strdup(node->nodename);
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(dmn->name), dmn->cell, 0, vpid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
opal_list_append(&daemons, &dmn->super);
|
||||
|
||||
vpid++;
|
||||
}
|
||||
|
||||
/* store the daemon info on the registry */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
@ -390,29 +367,6 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
|
||||
var = mca_base_param_environ_variable("seed", NULL, NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
|
||||
#if 0
|
||||
/* JMS What to do for sched_yield? */
|
||||
|
||||
/* set the progress engine schedule for this node. if node_slots
|
||||
is set to zero, then we default to NOT being oversubscribed */
|
||||
if (node->node_slots > 0 &&
|
||||
num_processes > node->node_slots) {
|
||||
if (mca_pls_slurm_component.debug) {
|
||||
opal_output(0, "pls:slurm: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
|
||||
node->node_slots, num_processes);
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "1", true, &env);
|
||||
} else {
|
||||
if (mca_pls_slurm_component.debug) {
|
||||
opal_output(0, "pls:slurm: not oversubscribed -- setting mpi_yield_when_idle to 0");
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "0", true, &env);
|
||||
}
|
||||
free(var);
|
||||
#endif
|
||||
|
||||
/* exec the daemon */
|
||||
rc = pls_slurm_start_proc(argc, argv, env, cur_prefix);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
@ -424,16 +378,6 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
|
||||
/* JMS: how do we catch when srun dies? */
|
||||
|
||||
cleanup:
|
||||
while (NULL != (item = opal_list_remove_first(&nodes))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
|
||||
while (NULL != (item = opal_list_remove_first(&mapping_list))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&mapping_list);
|
||||
|
||||
while (NULL != (item = opal_list_remove_first(&daemons))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
|
@ -58,12 +58,10 @@
|
||||
#include "orte/mca/smr/smr.h"
|
||||
#include "orte/mca/gpr/gpr.h"
|
||||
#include "orte/mca/sds/base/base.h"
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/ns/ns.h"
|
||||
|
||||
/* needs to be cleaned up for ORTE 2.0 */
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
|
||||
|
||||
#include "orte/mca/pls/base/pls_private.h"
|
||||
#include "pls_tm.h"
|
||||
@ -118,14 +116,16 @@ extern char **environ;
|
||||
|
||||
static int pls_tm_launch_job(orte_jobid_t jobid)
|
||||
{
|
||||
opal_list_t mapping;
|
||||
opal_list_item_t *m_item, *n_item;
|
||||
orte_job_map_t *map;
|
||||
opal_list_item_t *item;
|
||||
size_t num_nodes;
|
||||
orte_vpid_t vpid;
|
||||
int node_name_index;
|
||||
int proc_name_index;
|
||||
char *jobid_string;
|
||||
char *uri, *param;
|
||||
char **env;
|
||||
char *var;
|
||||
char **argv;
|
||||
int argc;
|
||||
int rc;
|
||||
@ -139,24 +139,17 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
|
||||
opal_list_t daemons;
|
||||
orte_pls_daemon_info_t *dmn;
|
||||
|
||||
/* Query the list of nodes allocated and mapped to this job.
|
||||
/* Query the map for this job.
|
||||
* We need the entire mapping for a couple of reasons:
|
||||
* - need the prefix to start with.
|
||||
* - need to know if we are launching on a subset of the allocated nodes
|
||||
*/
|
||||
OBJ_CONSTRUCT(&mapping, opal_list_t);
|
||||
rc = orte_rmaps_base_get_map(jobid, &mapping);
|
||||
rc = orte_rmaps.get_job_map(&map, jobid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
num_nodes = 0;
|
||||
for(m_item = opal_list_get_first(&mapping);
|
||||
m_item != opal_list_get_end(&mapping);
|
||||
m_item = opal_list_get_next(m_item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
|
||||
num_nodes += opal_list_get_size(&map->nodes);
|
||||
}
|
||||
num_nodes = opal_list_get_size(&map->nodes);
|
||||
|
||||
/*
|
||||
* Allocate a range of vpids for the daemons.
|
||||
@ -286,174 +279,139 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
|
||||
lib_base = opal_basename(OPAL_LIBDIR);
|
||||
bin_base = opal_basename(OPAL_BINDIR);
|
||||
|
||||
/*
|
||||
* iterate through each of the contexts
|
||||
*/
|
||||
for (m_item = opal_list_get_first(&mapping);
|
||||
m_item != opal_list_get_end(&mapping);
|
||||
m_item = opal_list_get_next(m_item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
|
||||
char** env;
|
||||
char* var;
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
var = mca_base_param_environ_variable("seed",NULL,NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
var = mca_base_param_environ_variable("seed",NULL,NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
|
||||
/* If we have a prefix, then modify the PATH and
|
||||
LD_LIBRARY_PATH environment variables. */
|
||||
if (NULL != map->app->prefix_dir) {
|
||||
char *newenv;
|
||||
/* If we have a prefix, then modify the PATH and
|
||||
LD_LIBRARY_PATH environment variables. We only allow
|
||||
a single prefix to be specified. Since there will
|
||||
always be at least one app_context, we take it from
|
||||
there
|
||||
*/
|
||||
if (NULL != map->apps[0]->prefix_dir) {
|
||||
char *newenv;
|
||||
|
||||
for (i = 0; NULL != env && NULL != env[i]; ++i) {
|
||||
/* Reset PATH */
|
||||
if (0 == strncmp("PATH=", env[i], 5)) {
|
||||
asprintf(&newenv, "%s/%s:%s",
|
||||
map->apps[0]->prefix_dir, bin_base, env[i] + 5);
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: resetting PATH: %s",
|
||||
newenv);
|
||||
}
|
||||
opal_setenv("PATH", newenv, true, &env);
|
||||
free(newenv);
|
||||
}
|
||||
|
||||
for (i = 0; NULL != env && NULL != env[i]; ++i) {
|
||||
/* Reset PATH */
|
||||
if (0 == strncmp("PATH=", env[i], 5)) {
|
||||
asprintf(&newenv, "%s/%s:%s",
|
||||
map->app->prefix_dir, bin_base, env[i] + 5);
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: resetting PATH: %s",
|
||||
newenv);
|
||||
}
|
||||
opal_setenv("PATH", newenv, true, &env);
|
||||
free(newenv);
|
||||
}
|
||||
|
||||
/* Reset LD_LIBRARY_PATH */
|
||||
else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) {
|
||||
asprintf(&newenv, "%s/%s:%s",
|
||||
map->app->prefix_dir, lib_base, env[i] + 16);
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: resetting LD_LIBRARY_PATH: %s",
|
||||
newenv);
|
||||
}
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
|
||||
free(newenv);
|
||||
}
|
||||
}
|
||||
/* Reset LD_LIBRARY_PATH */
|
||||
else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) {
|
||||
asprintf(&newenv, "%s/%s:%s",
|
||||
map->apps[0]->prefix_dir, lib_base, env[i] + 16);
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: resetting LD_LIBRARY_PATH: %s",
|
||||
newenv);
|
||||
}
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
|
||||
free(newenv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Do a quick sanity check to ensure that we can find the
|
||||
orted in the PATH */
|
||||
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = pls_tm_check_path(argv[0], env))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
opal_show_help("help-pls-tm.txt", "daemon-not-found",
|
||||
true, argv[0]);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Do a quick sanity check to ensure that we can find the
|
||||
orted in the PATH */
|
||||
/* Iterate through each of the nodes and spin
|
||||
* up a daemon.
|
||||
*/
|
||||
for (item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(n_item)) {
|
||||
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
|
||||
orte_process_name_t* name;
|
||||
char* name_string;
|
||||
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = pls_tm_check_path(argv[0], env))) {
|
||||
/* new daemon - setup to record its info */
|
||||
dmn = OBJ_NEW(orte_pls_daemon_info_t);
|
||||
dmn->active_job = jobid;
|
||||
opal_list_append(&daemons, &dmn->super);
|
||||
|
||||
/* setup node name */
|
||||
free(argv[node_name_index]);
|
||||
argv[node_name_index] = strdup(node->nodename);
|
||||
|
||||
/* record the node name in the daemon struct */
|
||||
dmn->cell = node->cell;
|
||||
dmn->nodename = strdup(node->nodename);
|
||||
|
||||
/* initialize daemons process name */
|
||||
rc = orte_ns.create_process_name(&name, node->cell, 0, vpid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
opal_show_help("help-pls-tm.txt", "daemon-not-found",
|
||||
true, argv[0]);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Iterate through each of the nodes and spin
|
||||
* up a daemon.
|
||||
*/
|
||||
for (n_item = opal_list_get_first(&map->nodes);
|
||||
n_item != opal_list_get_end(&map->nodes);
|
||||
n_item = opal_list_get_next(n_item)) {
|
||||
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
|
||||
orte_ras_node_t* node = rmaps_node->node;
|
||||
orte_process_name_t* name;
|
||||
char* name_string;
|
||||
|
||||
/* already launched on this node */
|
||||
if (0 != node->node_launched++) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* new daemon - setup to record its info */
|
||||
dmn = OBJ_NEW(orte_pls_daemon_info_t);
|
||||
dmn->active_job = jobid;
|
||||
opal_list_append(&daemons, &dmn->super);
|
||||
|
||||
/* setup node name */
|
||||
free(argv[node_name_index]);
|
||||
argv[node_name_index] = strdup(node->node_name);
|
||||
|
||||
/* record the node name in the daemon struct */
|
||||
dmn->cell = node->node_cellid;
|
||||
dmn->nodename = strdup(node->node_name);
|
||||
|
||||
/* initialize daemons process name */
|
||||
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* save it in the daemon struct */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* setup per-node options */
|
||||
if (mca_pls_tm_component.debug ||
|
||||
mca_pls_tm_component.verbose) {
|
||||
opal_output(0, "pls:tm: launching on node %s",
|
||||
node->node_name);
|
||||
}
|
||||
|
||||
/* setup process name */
|
||||
rc = orte_ns.get_proc_name_string(&name_string, name);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:tm: unable to create process name");
|
||||
return rc;
|
||||
}
|
||||
free(argv[proc_name_index]);
|
||||
argv[proc_name_index] = strdup(name_string);
|
||||
|
||||
/* set the progress engine schedule for this node.
|
||||
* if node_slots is set to zero, then we default to
|
||||
* NOT being oversubscribed
|
||||
*/
|
||||
if (node->node_slots > 0 &&
|
||||
(orte_std_cntr_t)opal_list_get_size(&rmaps_node->node_procs) > node->node_slots) {
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
|
||||
node->node_slots,
|
||||
opal_list_get_size(&rmaps_node->node_procs));
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "1", true, &env);
|
||||
} else {
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: not oversubscribed -- setting mpi_yield_when_idle to 0");
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "0", true, &env);
|
||||
}
|
||||
free(var);
|
||||
|
||||
/* exec the daemon */
|
||||
if (mca_pls_tm_component.debug) {
|
||||
param = opal_argv_join(argv, ' ');
|
||||
if (NULL != param) {
|
||||
opal_output(0, "pls:tm: executing: %s", param);
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
|
||||
rc = pls_tm_start_proc(node->node_name, argc, argv, env,
|
||||
tm_task_ids + launched,
|
||||
tm_events + launched);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:tm: start_procs returned error %d", rc);
|
||||
goto cleanup;
|
||||
}
|
||||
launched++;
|
||||
++vpid;
|
||||
free(name);
|
||||
|
||||
/* Allow some progress to occur */
|
||||
opal_event_loop(OPAL_EVLOOP_NONBLOCK);
|
||||
/* save it in the daemon struct */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* setup per-node options */
|
||||
if (mca_pls_tm_component.debug ||
|
||||
mca_pls_tm_component.verbose) {
|
||||
opal_output(0, "pls:tm: launching on node %s",
|
||||
node->nodename);
|
||||
}
|
||||
|
||||
/* setup process name */
|
||||
rc = orte_ns.get_proc_name_string(&name_string, name);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:tm: unable to create process name");
|
||||
return rc;
|
||||
}
|
||||
free(argv[proc_name_index]);
|
||||
argv[proc_name_index] = strdup(name_string);
|
||||
|
||||
/* exec the daemon */
|
||||
if (mca_pls_tm_component.debug) {
|
||||
param = opal_argv_join(argv, ' ');
|
||||
if (NULL != param) {
|
||||
opal_output(0, "pls:tm: executing: %s", param);
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
|
||||
rc = pls_tm_start_proc(node->nodename, argc, argv, env,
|
||||
tm_task_ids + launched,
|
||||
tm_events + launched);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:tm: start_procs returned error %d", rc);
|
||||
goto cleanup;
|
||||
}
|
||||
launched++;
|
||||
++vpid;
|
||||
free(name);
|
||||
|
||||
/* Allow some progress to occur */
|
||||
opal_event_loop(OPAL_EVLOOP_NONBLOCK);
|
||||
}
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm:launch: finished spawning orteds\n");
|
||||
}
|
||||
|
||||
/* all done, so store the daemon info on the registry */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
@ -478,10 +436,6 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
|
||||
free(tm_task_ids);
|
||||
}
|
||||
|
||||
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
|
||||
OBJ_RELEASE(m_item);
|
||||
}
|
||||
OBJ_DESTRUCT(&mapping);
|
||||
if (NULL != lib_base) {
|
||||
free(lib_base);
|
||||
}
|
||||
@ -490,8 +444,8 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
|
||||
}
|
||||
|
||||
/* deconstruct the daemon list */
|
||||
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
|
||||
OBJ_RELEASE(m_item);
|
||||
while (NULL != (item = opal_list_remove_first(&daemons))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&daemons);
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
#import "orte/mca/pls/pls.h"
|
||||
#import "orte/mca/errmgr/errmgr.h"
|
||||
#import "orte/mca/ras/ras_types.h"
|
||||
#import "orte/mca/rmaps/base/rmaps_private.h"
|
||||
#import "orte/mca/rmaps/rmaps.h"
|
||||
#import "orte/mca/smr/smr.h"
|
||||
|
||||
#import "pls_xgrid_client.h"
|
||||
@ -229,8 +229,8 @@ char **environ;
|
||||
|
||||
-(int) launchJob:(orte_jobid_t) jobid
|
||||
{
|
||||
opal_list_t mapping;
|
||||
opal_list_item_t *m_item, *n_item;
|
||||
orte_job_map_t *map;
|
||||
opal_list_item_t *item;
|
||||
size_t num_nodes;
|
||||
orte_vpid_t vpid;
|
||||
int rc, i = 0;
|
||||
@ -239,24 +239,17 @@ char **environ;
|
||||
char *orted_path;
|
||||
char *nsuri = NULL, *gpruri = NULL;
|
||||
|
||||
/* Query the list of nodes allocated and mapped to this job.
|
||||
/* Query the map for this job.
|
||||
* We need the entire mapping for a couple of reasons:
|
||||
* - need the prefix to start with.
|
||||
* - need to know if we are launching on a subset of the allocated nodes
|
||||
*/
|
||||
OBJ_CONSTRUCT(&mapping, opal_list_t);
|
||||
rc = orte_rmaps_base_get_map(jobid, &mapping);
|
||||
rc = orte_rmaps.get_job_map(&map, jobid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
num_nodes = 0;
|
||||
for(m_item = opal_list_get_first(&mapping);
|
||||
m_item != opal_list_get_end(&mapping);
|
||||
m_item = opal_list_get_next(m_item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
|
||||
num_nodes += opal_list_get_size(&map->nodes);
|
||||
}
|
||||
num_nodes = opal_list_get_size(&map->nodes);
|
||||
|
||||
/*
|
||||
* Allocate a range of vpids for the daemons.
|
||||
@ -300,84 +293,69 @@ char **environ;
|
||||
/* build up the array of task specifications */
|
||||
NSMutableDictionary *taskSpecifications = [NSMutableDictionary dictionary];
|
||||
|
||||
/*
|
||||
* iterate through each of the contexts
|
||||
/* Iterate through each of the nodes and spin
|
||||
* up a daemon.
|
||||
*/
|
||||
for (m_item = opal_list_get_first(&mapping);
|
||||
m_item != opal_list_get_end(&mapping);
|
||||
m_item = opal_list_get_next(m_item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
|
||||
for (item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(n_item)) {
|
||||
orte_mapped_node_t* rmaps_node = (orte_mapped_node_t*)item;
|
||||
orte_process_name_t* name;
|
||||
char* name_string;
|
||||
|
||||
/* new daemon - setup to record its info */
|
||||
dmn = OBJ_NEW(orte_pls_daemon_info_t);
|
||||
dmn->active_job = jobid;
|
||||
opal_list_append(&daemons, &dmn->super);
|
||||
|
||||
/* record the node name in the daemon struct */
|
||||
dmn->cell = node->cell;
|
||||
dmn->nodename = strdup(node->nodename);
|
||||
|
||||
/* initialize daemons process name */
|
||||
rc = orte_ns.create_process_name(&name, node->cell, 0, vpid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* save it in the daemon struct */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* setup per-node options */
|
||||
opal_output_verbose(1, orte_pls_base.pls_output,
|
||||
"orte:pls:xgrid: launching on node %s",
|
||||
node->nodename);
|
||||
|
||||
/* setup process name */
|
||||
rc = orte_ns.get_proc_name_string(&name_string, name);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(orte_pls_base.pls_output,
|
||||
"orte:pls:xgrid: unable to create process name");
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Iterate through each of the nodes and spin
|
||||
* up a daemon.
|
||||
*/
|
||||
for (n_item = opal_list_get_first(&map->nodes);
|
||||
n_item != opal_list_get_end(&map->nodes);
|
||||
n_item = opal_list_get_next(n_item)) {
|
||||
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
|
||||
orte_ras_node_t* node = rmaps_node->node;
|
||||
orte_process_name_t* name;
|
||||
char* name_string;
|
||||
|
||||
/* already launched on this node */
|
||||
if (0 != node->node_launched++) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* new daemon - setup to record its info */
|
||||
dmn = OBJ_NEW(orte_pls_daemon_info_t);
|
||||
dmn->active_job = jobid;
|
||||
opal_list_append(&daemons, &dmn->super);
|
||||
|
||||
/* record the node name in the daemon struct */
|
||||
dmn->cell = node->node_cellid;
|
||||
dmn->nodename = strdup(node->node_name);
|
||||
|
||||
/* initialize daemons process name */
|
||||
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* save it in the daemon struct */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* setup per-node options */
|
||||
opal_output_verbose(1, orte_pls_base.pls_output,
|
||||
"orte:pls:xgrid: launching on node %s",
|
||||
node->node_name);
|
||||
|
||||
/* setup process name */
|
||||
rc = orte_ns.get_proc_name_string(&name_string, name);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(orte_pls_base.pls_output,
|
||||
"orte:pls:xgrid: unable to create process name");
|
||||
return rc;
|
||||
}
|
||||
|
||||
NSMutableDictionary *task = [NSMutableDictionary dictionary];
|
||||
[task setObject: [NSString stringWithCString: orted_path]
|
||||
forKey: XGJobSpecificationCommandKey];
|
||||
NSArray *taskArguments =
|
||||
[NSArray arrayWithObjects: @"--no-daemonize",
|
||||
@"--bootproxy", [NSString stringWithFormat: @"%d", jobid],
|
||||
@"--name", [NSString stringWithCString: name_string],
|
||||
@"--num_procs", [NSString stringWithFormat: @"%d", 1],
|
||||
@"--nodename", [NSString stringWithCString: node->node_name],
|
||||
@"--nsreplica", [NSString stringWithCString: nsuri],
|
||||
@"--gprreplica", [NSString stringWithCString: gpruri],
|
||||
nil];
|
||||
[task setObject: taskArguments forKey: XGJobSpecificationArgumentsKey];
|
||||
|
||||
[taskSpecifications setObject: task
|
||||
forKey: [NSString stringWithFormat: @"%d", i]];
|
||||
|
||||
vpid++; i++;
|
||||
}
|
||||
NSMutableDictionary *task = [NSMutableDictionary dictionary];
|
||||
[task setObject: [NSString stringWithCString: orted_path]
|
||||
forKey: XGJobSpecificationCommandKey];
|
||||
NSArray *taskArguments =
|
||||
[NSArray arrayWithObjects: @"--no-daemonize",
|
||||
@"--bootproxy", [NSString stringWithFormat: @"%d", jobid],
|
||||
@"--name", [NSString stringWithCString: name_string],
|
||||
@"--num_procs", [NSString stringWithFormat: @"%d", 1],
|
||||
@"--nodename", [NSString stringWithCString: node->nodename],
|
||||
@"--nsreplica", [NSString stringWithCString: nsuri],
|
||||
@"--gprreplica", [NSString stringWithCString: gpruri],
|
||||
nil];
|
||||
[task setObject: taskArguments forKey: XGJobSpecificationArgumentsKey];
|
||||
|
||||
[taskSpecifications setObject: task
|
||||
forKey: [NSString stringWithFormat: @"%d", i]];
|
||||
|
||||
vpid++; i++;
|
||||
}
|
||||
|
||||
/* job specification */
|
||||
@ -419,7 +397,7 @@ char **environ;
|
||||
forKey: [NSString stringWithFormat: @"%d", jobid]];
|
||||
|
||||
/* all done, so store the daemon info on the registry */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
@ -427,11 +405,6 @@ cleanup:
|
||||
if (NULL != nsuri) free(nsuri);
|
||||
if (NULL != gpruri) free(gpruri);
|
||||
|
||||
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
|
||||
OBJ_RELEASE(m_item);
|
||||
}
|
||||
OBJ_DESTRUCT(&mapping);
|
||||
|
||||
/* deconstruct the daemon list */
|
||||
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
|
||||
OBJ_RELEASE(m_item);
|
||||
|
@ -20,14 +20,14 @@ dist_pkgdata_DATA = base/help-orte-rmaps-base.txt
|
||||
|
||||
headers += \
|
||||
base/base.h \
|
||||
base/rmaps_class_instances.h \
|
||||
base/rmaps_private.h
|
||||
|
||||
libmca_rmaps_la_SOURCES += \
|
||||
base/rmaps_base_close.c \
|
||||
base/rmaps_base_map.c \
|
||||
base/rmaps_base_registry_fns.c \
|
||||
base/rmaps_base_map_job.c \
|
||||
base/rmaps_base_node.c \
|
||||
base/rmaps_base_no_ops.c \
|
||||
base/rmaps_base_support_fns.c \
|
||||
base/rmaps_base_open.c \
|
||||
base/rmaps_base_receive.c \
|
||||
base/rmaps_base_find_avail.c \
|
||||
|
@ -29,14 +29,14 @@
|
||||
/*
|
||||
* JOB_MAP
|
||||
*/
|
||||
int orte_rmaps_base_compare_map(orte_rmaps_base_map_t *value1, orte_rmaps_base_map_t *value2, orte_data_type_t type)
|
||||
int orte_rmaps_base_compare_map(orte_job_map_t *value1, orte_job_map_t *value2, orte_data_type_t type)
|
||||
{
|
||||
return ORTE_EQUAL;
|
||||
}
|
||||
|
||||
|
||||
/* MAPPED_PROC */
|
||||
int orte_rmaps_base_compare_mapped_proc(orte_rmaps_base_proc_t *value1, orte_rmaps_base_proc_t *value2, orte_data_type_t type)
|
||||
int orte_rmaps_base_compare_mapped_proc(orte_mapped_proc_t *value1, orte_mapped_proc_t *value2, orte_data_type_t type)
|
||||
{
|
||||
return ORTE_EQUAL;
|
||||
}
|
||||
@ -44,7 +44,7 @@ int orte_rmaps_base_compare_mapped_proc(orte_rmaps_base_proc_t *value1, orte_rma
|
||||
|
||||
|
||||
/* MAPPED_NODE */
|
||||
int orte_rmaps_base_compare_mapped_node(orte_rmaps_base_node_t *value1, orte_rmaps_base_node_t *value2, orte_data_type_t type)
|
||||
int orte_rmaps_base_compare_mapped_node(orte_mapped_node_t *value1, orte_mapped_node_t *value2, orte_data_type_t type)
|
||||
{
|
||||
return ORTE_EQUAL;
|
||||
}
|
||||
|
@ -34,12 +34,12 @@
|
||||
/*
|
||||
* JOB_MAP
|
||||
*/
|
||||
int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t *src, orte_data_type_t type)
|
||||
int orte_rmaps_base_copy_map(orte_job_map_t **dest, orte_job_map_t *src, orte_data_type_t type)
|
||||
{
|
||||
orte_std_cntr_t i;
|
||||
int rc;
|
||||
opal_list_item_t *item;
|
||||
orte_rmaps_base_node_t *srcnode, *nodeptr;
|
||||
orte_mapped_node_t *srcnode, *nodeptr;
|
||||
|
||||
if (NULL == src) {
|
||||
*dest = NULL;
|
||||
@ -47,34 +47,34 @@ int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t
|
||||
}
|
||||
|
||||
/* create the new object */
|
||||
*dest = OBJ_NEW(orte_rmaps_base_map_t);
|
||||
*dest = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == *dest) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* copy data into it */
|
||||
(*dest)->app = src->app;
|
||||
(*dest)->job = src->job;
|
||||
(*dest)->num_apps = src->num_apps;
|
||||
|
||||
(*dest)->procs = (orte_rmaps_base_proc_t**)malloc(src->num_procs * sizeof(orte_rmaps_base_proc_t));
|
||||
if (NULL == (*dest)->procs) {
|
||||
(*dest)->apps = (orte_app_context_t**)malloc(src->num_apps * sizeof(orte_app_context_t*));
|
||||
if (NULL == (*dest)->apps) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
OBJ_RELEASE(*dest);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
for (i=0; i < src->num_procs; i++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_proc(&((*dest)->procs[i]), src->procs[i], ORTE_MAPPED_PROC))) {
|
||||
for (i=0; i < src->num_apps; i++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((*dest)->apps[i]), src->apps[i], ORTE_APP_CONTEXT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(*dest);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
(*dest)->num_procs = src->num_procs;
|
||||
|
||||
for (item = opal_list_get_first(&(src->nodes));
|
||||
item != opal_list_get_end(&(src->nodes));
|
||||
item = opal_list_get_next(item)) {
|
||||
srcnode = (orte_rmaps_base_node_t*)item;
|
||||
srcnode = (orte_mapped_node_t*)item;
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_node(&nodeptr, srcnode, ORTE_MAPPED_NODE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(*dest);
|
||||
@ -89,52 +89,40 @@ int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t
|
||||
/*
|
||||
* MAPPED_PROC
|
||||
*/
|
||||
int orte_rmaps_base_copy_mapped_proc(orte_rmaps_base_proc_t **dest, orte_rmaps_base_proc_t *src, orte_data_type_t type)
|
||||
int orte_rmaps_base_copy_mapped_proc(orte_mapped_proc_t **dest, orte_mapped_proc_t *src, orte_data_type_t type)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (NULL == src) {
|
||||
*dest = NULL;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* create the new object */
|
||||
*dest = OBJ_NEW(orte_rmaps_base_proc_t);
|
||||
*dest = OBJ_NEW(orte_mapped_proc_t);
|
||||
if (NULL == *dest) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* copy data into it */
|
||||
if (NULL != src->app) {
|
||||
(*dest)->app = strdup(src->app);
|
||||
}
|
||||
(*dest)->name = src->name;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_node(&((*dest)->proc_node), src->proc_node, ORTE_MAPPED_NODE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(*dest);
|
||||
return rc;
|
||||
}
|
||||
(*dest)->rank = src->rank;
|
||||
|
||||
(*dest)->proc_name = src->proc_name;
|
||||
|
||||
(*dest)->proc_rank = src->proc_rank;
|
||||
(*dest)->app_idx = src->app_idx;
|
||||
|
||||
(*dest)->pid = src->pid;
|
||||
|
||||
(*dest)->local_pid = src->local_pid;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* MAPPED_NODE
|
||||
*/
|
||||
int orte_rmaps_base_copy_mapped_node(orte_rmaps_base_node_t **dest, orte_rmaps_base_node_t *src, orte_data_type_t type)
|
||||
int orte_rmaps_base_copy_mapped_node(orte_mapped_node_t **dest, orte_mapped_node_t *src, orte_data_type_t type)
|
||||
{
|
||||
int rc;
|
||||
opal_list_item_t *item;
|
||||
orte_rmaps_base_proc_t *srcproc, *procptr;
|
||||
orte_mapped_proc_t *srcproc, *procptr;
|
||||
|
||||
if (NULL == src) {
|
||||
*dest = NULL;
|
||||
@ -142,29 +130,43 @@ int orte_rmaps_base_copy_mapped_node(orte_rmaps_base_node_t **dest, orte_rmaps_b
|
||||
}
|
||||
|
||||
/* create the new object */
|
||||
*dest = OBJ_NEW(orte_rmaps_base_node_t);
|
||||
*dest = OBJ_NEW(orte_mapped_node_t);
|
||||
if (NULL == *dest) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* copy data into it */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((*dest)->node), src->node, ORTE_RAS_NODE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(*dest);
|
||||
return rc;
|
||||
(*dest)->cell = src->cell;
|
||||
|
||||
if (NULL != src->nodename) {
|
||||
(*dest)->nodename = strdup(src->nodename);
|
||||
}
|
||||
|
||||
if (NULL != src->username) {
|
||||
(*dest)->username = strdup(src->username);
|
||||
}
|
||||
|
||||
for (item = opal_list_get_first(&(src->node_procs));
|
||||
item != opal_list_get_end(&(src->node_procs));
|
||||
if (NULL != src->daemon) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((*dest)->daemon), src->daemon, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(*dest);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
(*dest)->oversubscribed = src->oversubscribed;
|
||||
|
||||
for (item = opal_list_get_first(&(src->procs));
|
||||
item != opal_list_get_end(&(src->procs));
|
||||
item = opal_list_get_next(item)) {
|
||||
srcproc = (orte_rmaps_base_proc_t*)item;
|
||||
srcproc = (orte_mapped_proc_t*)item;
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_proc(&procptr, srcproc, ORTE_MAPPED_PROC))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(*dest);
|
||||
return rc;
|
||||
}
|
||||
opal_list_append(&((*dest)->node_procs), &procptr->super);
|
||||
opal_list_append(&((*dest)->procs), &procptr->super);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -34,33 +34,32 @@
|
||||
* JOB_MAP
|
||||
*/
|
||||
int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
|
||||
orte_std_cntr_t num_vals, orte_data_type_t type)
|
||||
orte_std_cntr_t num_vals, orte_data_type_t type)
|
||||
{
|
||||
int rc;
|
||||
orte_std_cntr_t i, num_nodes;
|
||||
orte_rmaps_base_map_t **maps;
|
||||
orte_job_map_t **maps;
|
||||
opal_list_item_t *item;
|
||||
orte_rmaps_base_node_t *srcnode;
|
||||
orte_mapped_node_t *srcnode;
|
||||
|
||||
/* array of pointers to orte_rmaps_base_map_t objects - need to pack the objects a set of fields at a time */
|
||||
maps = (orte_rmaps_base_map_t**) src;
|
||||
/* array of pointers to orte_job_map_t objects - need to pack the objects a set of fields at a time */
|
||||
maps = (orte_job_map_t**) src;
|
||||
|
||||
for (i=0; i < num_vals; i++) {
|
||||
/* pack the app_context */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, maps[i]->app, 1, ORTE_APP_CONTEXT))) {
|
||||
/* pack the jobid this map is for */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->job), 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the number of procs */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->num_procs), 1, ORTE_STD_CNTR))) {
|
||||
/* pack the number of app_contexts */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->num_apps), 1, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the procs array */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(maps[i]->procs),
|
||||
maps[i]->num_procs, ORTE_MAPPED_PROC))) {
|
||||
/* pack the app_contexts */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, maps[i]->apps, maps[i]->num_apps, ORTE_APP_CONTEXT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
@ -77,7 +76,7 @@ int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
|
||||
for (item = opal_list_get_first(&(maps[i]->nodes));
|
||||
item != opal_list_get_end(&(maps[i]->nodes));
|
||||
item = opal_list_get_next(item)) {
|
||||
srcnode = (orte_rmaps_base_node_t*)item;
|
||||
srcnode = (orte_mapped_node_t*)item;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)srcnode,
|
||||
1, ORTE_MAPPED_NODE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -99,45 +98,33 @@ int orte_rmaps_base_pack_mapped_proc(orte_buffer_t *buffer, void *src,
|
||||
{
|
||||
int rc;
|
||||
orte_std_cntr_t i;
|
||||
orte_rmaps_base_proc_t **procs;
|
||||
orte_mapped_proc_t **procs;
|
||||
|
||||
/* array of pointers to orte_rmaps_base_proc_t objects - need to pack the objects a set of fields at a time */
|
||||
procs = (orte_rmaps_base_proc_t**) src;
|
||||
/* array of pointers to orte_mapped_proc_t objects - need to pack the objects a set of fields at a time */
|
||||
procs = (orte_mapped_proc_t**) src;
|
||||
|
||||
for (i=0; i < num_vals; i++) {
|
||||
/* pack the app */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, procs[i]->app, 1, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the proc_node */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, procs[i]->proc_node, 1, ORTE_MAPPED_NODE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the proc name */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(&(procs[i]->proc_name)),
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(&(procs[i]->name)),
|
||||
1, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the rank */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->proc_rank), 1, ORTE_STD_CNTR))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->rank), 1, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the pls-pid */
|
||||
/* pack the pid */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->pid), 1, ORTE_PID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the local pid */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->local_pid), 1, ORTE_PID))) {
|
||||
/* pack the app_idx */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->app_idx), 1, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
@ -155,22 +142,46 @@ int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
|
||||
{
|
||||
int rc;
|
||||
orte_std_cntr_t i, num_procs;
|
||||
orte_rmaps_base_node_t **nodes;
|
||||
orte_mapped_node_t **nodes;
|
||||
opal_list_item_t *item;
|
||||
orte_rmaps_base_proc_t *srcproc;
|
||||
orte_mapped_proc_t *srcproc;
|
||||
|
||||
/* array of pointers to orte_rmaps_base_node_t objects - need to pack the objects a set of fields at a time */
|
||||
nodes = (orte_rmaps_base_node_t**) src;
|
||||
/* array of pointers to orte_mapped_node_t objects - need to pack the objects a set of fields at a time */
|
||||
nodes = (orte_mapped_node_t**) src;
|
||||
|
||||
for (i=0; i < num_vals; i++) {
|
||||
/* pack the node object */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, nodes[i]->node, 1, ORTE_RAS_NODE))) {
|
||||
/* pack the cellid */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->cell), 1, ORTE_CELLID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the nodename */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->nodename), 1, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the username */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->username), 1, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the daemon's name */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->daemon), 1, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the oversubscribed flag */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->oversubscribed), 1, ORTE_BOOL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the number of procs */
|
||||
num_procs = (orte_std_cntr_t)opal_list_get_size(&(nodes[i]->node_procs));
|
||||
num_procs = (orte_std_cntr_t)opal_list_get_size(&(nodes[i]->procs));
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &num_procs, 1, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -178,10 +189,10 @@ int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
|
||||
|
||||
/* pack the procs list */
|
||||
if (0 < num_procs) {
|
||||
for (item = opal_list_get_first(&(nodes[i]->node_procs));
|
||||
item != opal_list_get_end(&(nodes[i]->node_procs));
|
||||
for (item = opal_list_get_first(&(nodes[i]->procs));
|
||||
item != opal_list_get_end(&(nodes[i]->procs));
|
||||
item = opal_list_get_next(item)) {
|
||||
srcproc = (orte_rmaps_base_proc_t*)item;
|
||||
srcproc = (orte_mapped_proc_t*)item;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)srcproc,
|
||||
1, ORTE_MAPPED_PROC))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -34,10 +34,10 @@
|
||||
/*
|
||||
* JOB_MAP
|
||||
*/
|
||||
int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t *src, orte_data_type_t type)
|
||||
int orte_rmaps_base_print_map(char **output, char *prefix, orte_job_map_t *src, orte_data_type_t type)
|
||||
{
|
||||
char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
|
||||
orte_rmaps_base_node_t *srcnode;
|
||||
orte_mapped_node_t *srcnode;
|
||||
orte_std_cntr_t i, num_nodes;
|
||||
opal_list_item_t *item;
|
||||
int rc;
|
||||
@ -52,32 +52,22 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t
|
||||
asprintf(&pfx2, "%s", prefix);
|
||||
}
|
||||
|
||||
asprintf(&tmp, "%sMap for app_context:", pfx2);
|
||||
asprintf(&tmp, "%sMap for job: %ld\tNum app_contexts: %ld", pfx2, (long)src->job, (long)src->num_apps);
|
||||
|
||||
asprintf(&pfx, "%s\t", pfx2);
|
||||
free(pfx2);
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->app, ORTE_APP_CONTEXT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(pfx);
|
||||
free(tmp);
|
||||
return rc;
|
||||
}
|
||||
asprintf(&tmp3, "%s\n%s\n%sNum elements in procs array: %ld", tmp, tmp2, pfx, (long)src->num_procs);
|
||||
free(tmp);
|
||||
free(tmp2);
|
||||
|
||||
for (i=0; i < src->num_procs; i++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_proc(&tmp, pfx, src->procs[i], ORTE_MAPPED_PROC))) {
|
||||
|
||||
for (i=0; i < src->num_apps; i++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->apps[i], ORTE_APP_CONTEXT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(pfx);
|
||||
free(tmp3);
|
||||
free(tmp);
|
||||
return rc;
|
||||
}
|
||||
asprintf(&tmp2, "%s\n%s", tmp3, tmp);
|
||||
asprintf(&tmp3, "%s\n%s", tmp, tmp2);
|
||||
free(tmp);
|
||||
free(tmp3);
|
||||
tmp3 = tmp2;
|
||||
free(tmp2);
|
||||
tmp = tmp3;
|
||||
}
|
||||
|
||||
num_nodes = (orte_std_cntr_t)opal_list_get_size(&(src->nodes));
|
||||
@ -86,7 +76,7 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t
|
||||
for (item = opal_list_get_first(&(src->nodes));
|
||||
item != opal_list_get_end(&(src->nodes));
|
||||
item = opal_list_get_next(item)) {
|
||||
srcnode = (orte_rmaps_base_node_t*)item;
|
||||
srcnode = (orte_mapped_node_t*)item;
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_node(&tmp2, pfx, srcnode, ORTE_MAPPED_NODE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(pfx);
|
||||
@ -110,7 +100,7 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t
|
||||
/*
|
||||
* MAPPED_PROC
|
||||
*/
|
||||
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_base_proc_t *src, orte_data_type_t type)
|
||||
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_mapped_proc_t *src, orte_data_type_t type)
|
||||
{
|
||||
char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
|
||||
int rc;
|
||||
@ -125,35 +115,18 @@ int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_ba
|
||||
asprintf(&pfx2, "%s", prefix);
|
||||
}
|
||||
|
||||
asprintf(&tmp, "%sMapped proc:", pfx2);
|
||||
asprintf(&tmp3, "%sMapped proc:\n%s\tProc Name:", pfx2, pfx2);
|
||||
|
||||
asprintf(&pfx, "%s\t", pfx2);
|
||||
|
||||
if (NULL != src->app) {
|
||||
asprintf(&tmp2, "%s\n%sApp name: %s", tmp, pfx, src->app);
|
||||
} else {
|
||||
asprintf(&tmp2, "%s\n%sApplication has NULL name", tmp, pfx);
|
||||
}
|
||||
free(tmp);
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_node(&tmp, pfx, src->proc_node, ORTE_MAPPED_NODE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(pfx);
|
||||
free(tmp2);
|
||||
return rc;
|
||||
}
|
||||
asprintf(&tmp3, "%s\n%s\n%s\n%sProc Name:", tmp2, pfx, tmp, pfx);
|
||||
free(tmp2);
|
||||
free(tmp);
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, &(src->proc_name), ORTE_NAME))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, &(src->name), ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(pfx);
|
||||
free(tmp3);
|
||||
return rc;
|
||||
}
|
||||
asprintf(&tmp, "%s\n%s\n%sProc Rank: %ld\tPLS pid: %ld\tLocal PID: %ld\n", tmp3, tmp2, pfx,
|
||||
(long)src->proc_rank, (long)src->pid, (long)src->local_pid);
|
||||
asprintf(&tmp, "%s\n%s\n%sProc Rank: %ld\tProc PID: %ld\tApp_context index: %ld\n", tmp3, tmp2, pfx,
|
||||
(long)src->rank, (long)src->pid, (long)src->app_idx);
|
||||
free(tmp2);
|
||||
free(tmp3);
|
||||
|
||||
@ -168,15 +141,13 @@ int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_ba
|
||||
/*
|
||||
* MAPPED_NODE
|
||||
*/
|
||||
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_base_node_t *src, orte_data_type_t type)
|
||||
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_node_t *src, orte_data_type_t type)
|
||||
{
|
||||
int rc;
|
||||
char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
|
||||
orte_std_cntr_t num_procs;
|
||||
#if 0
|
||||
opal_list_item_t *item;
|
||||
orte_rmaps_base_proc_t *srcproc;
|
||||
#endif
|
||||
orte_mapped_proc_t *srcproc;
|
||||
|
||||
/* set default result */
|
||||
*output = NULL;
|
||||
@ -187,28 +158,31 @@ int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_ba
|
||||
} else {
|
||||
asprintf(&pfx2, "%s", prefix);
|
||||
}
|
||||
|
||||
asprintf(&tmp, "%sMapped node:", pfx2);
|
||||
|
||||
asprintf(&tmp, "%sMapped node:\n%s\tCell: %ld\tNodename: %s\tUsername: %s\n%s\tDaemon name:", pfx2, pfx2,
|
||||
(long)src->cell, (NULL == src->nodename ? "NULL" : src->nodename),
|
||||
(NULL == src->username ? "NULL" : src->username), pfx2);
|
||||
|
||||
asprintf(&pfx, "%s\t", pfx2);
|
||||
free(pfx2);
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->node, ORTE_RAS_NODE))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->daemon, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(pfx);
|
||||
free(tmp);
|
||||
return rc;
|
||||
}
|
||||
|
||||
num_procs = (orte_std_cntr_t)opal_list_get_size(&(src->node_procs));
|
||||
asprintf(&tmp3, "%s\n%s\n%sNum elements in procs list: %ld", tmp, tmp2, pfx, (long)num_procs);
|
||||
num_procs = (orte_std_cntr_t)opal_list_get_size(&(src->procs));
|
||||
asprintf(&tmp3, "%s\n\t%s\n%sOversubscribed: %s\tNum elements in procs list: %ld", tmp, tmp2, pfx,
|
||||
(src->oversubscribed ? "True" : "False"), (long)num_procs);
|
||||
free(tmp);
|
||||
free(tmp2);
|
||||
#if 0
|
||||
for (item = opal_list_get_first(&(src->node_procs));
|
||||
item != opal_list_get_end(&(src->node_procs));
|
||||
|
||||
for (item = opal_list_get_first(&(src->procs));
|
||||
item != opal_list_get_end(&(src->procs));
|
||||
item = opal_list_get_next(item)) {
|
||||
srcproc = (orte_rmaps_base_proc_t*)item;
|
||||
srcproc = (orte_mapped_proc_t*)item;
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_proc(&tmp2, pfx, srcproc, ORTE_MAPPED_PROC))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(pfx);
|
||||
@ -220,7 +194,7 @@ int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_ba
|
||||
free(tmp2);
|
||||
tmp3 = tmp;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* set the return */
|
||||
*output = tmp3;
|
||||
|
||||
|
@ -32,10 +32,10 @@
|
||||
/*
|
||||
* JOB_MAP
|
||||
*/
|
||||
int orte_rmaps_base_size_map(size_t *size, orte_rmaps_base_map_t *src, orte_data_type_t type)
|
||||
int orte_rmaps_base_size_map(size_t *size, orte_job_map_t *src, orte_data_type_t type)
|
||||
{
|
||||
/* account for the object itself */
|
||||
*size = sizeof(orte_rmaps_base_map_t);
|
||||
*size = sizeof(orte_job_map_t);
|
||||
|
||||
/* if src is NULL, then that's all we wanted */
|
||||
if (NULL == src) return ORTE_SUCCESS;
|
||||
@ -46,10 +46,10 @@ int orte_rmaps_base_size_map(size_t *size, orte_rmaps_base_map_t *src, orte_data
|
||||
/*
|
||||
* MAPPED_PROC
|
||||
*/
|
||||
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_rmaps_base_proc_t *src, orte_data_type_t type)
|
||||
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_mapped_proc_t *src, orte_data_type_t type)
|
||||
{
|
||||
/* account for the object itself */
|
||||
*size = sizeof(orte_rmaps_base_proc_t);
|
||||
*size = sizeof(orte_mapped_proc_t);
|
||||
|
||||
/* if src is NULL, then that's all we wanted */
|
||||
if (NULL == src) return ORTE_SUCCESS;
|
||||
@ -60,10 +60,10 @@ int orte_rmaps_base_size_mapped_proc(size_t *size, orte_rmaps_base_proc_t *src,
|
||||
/*
|
||||
* MAPPED_NODE
|
||||
*/
|
||||
int orte_rmaps_base_size_mapped_node(size_t *size, orte_rmaps_base_node_t *src, orte_data_type_t type)
|
||||
int orte_rmaps_base_size_mapped_node(size_t *size, orte_mapped_node_t *src, orte_data_type_t type)
|
||||
{
|
||||
/* account for the object itself */
|
||||
*size = sizeof(orte_rmaps_base_node_t);
|
||||
*size = sizeof(orte_mapped_node_t);
|
||||
|
||||
/* if src is NULL, then that's all we wanted */
|
||||
if (NULL == src) return ORTE_SUCCESS;
|
||||
|
@ -40,50 +40,49 @@ int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest,
|
||||
{
|
||||
int rc;
|
||||
orte_std_cntr_t i, j, n, num_nodes;
|
||||
orte_rmaps_base_map_t **maps;
|
||||
orte_rmaps_base_node_t *node;
|
||||
orte_job_map_t **maps;
|
||||
orte_mapped_node_t *node;
|
||||
|
||||
/* unpack into array of orte_rmaps_base_map_t objects */
|
||||
maps = (orte_rmaps_base_map_t**) dest;
|
||||
/* unpack into array of orte_job_map_t objects */
|
||||
maps = (orte_job_map_t**) dest;
|
||||
for (i=0; i < *num_vals; i++) {
|
||||
|
||||
/* create the orte_rmaps_base_map_t object */
|
||||
maps[i] = OBJ_NEW(orte_rmaps_base_map_t);
|
||||
maps[i] = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == maps[i]) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* unpack the jobid */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(maps[i]->job), &n, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the number of app_contexts */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(maps[i]->num_apps), &n, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* allocate space for them */
|
||||
maps[i]->apps = (orte_app_context_t**)malloc(maps[i]->num_apps * sizeof(orte_app_context_t*));
|
||||
if (NULL == maps[i]->apps) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
/* unpack the app_context */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(maps[i]->app), &n, ORTE_APP_CONTEXT))) {
|
||||
&(maps[i]->apps), &(maps[i]->num_apps), ORTE_APP_CONTEXT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the number of procs */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(maps[i]->num_procs), &n, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* if we have some, allocate space for them */
|
||||
if (0 < maps[i]->num_procs) {
|
||||
maps[i]->procs = (orte_rmaps_base_proc_t**)malloc(maps[i]->num_procs * sizeof(orte_rmaps_base_proc_t*));
|
||||
if (NULL == maps[i]->procs) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
/* and unpack them */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, maps[i]->procs, &(maps[i]->num_procs), ORTE_MAPPED_PROC))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* unpack the number of nodes */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &num_nodes, &n, ORTE_STD_CNTR))) {
|
||||
@ -112,39 +111,23 @@ int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
|
||||
{
|
||||
int rc;
|
||||
orte_std_cntr_t i, n;
|
||||
orte_rmaps_base_proc_t **procs;
|
||||
orte_mapped_proc_t **procs;
|
||||
|
||||
/* unpack into array of orte_rmaps_base_proc_t objects */
|
||||
procs = (orte_rmaps_base_proc_t**) dest;
|
||||
/* unpack into array of orte_mapped_proc_t objects */
|
||||
procs = (orte_mapped_proc_t**) dest;
|
||||
for (i=0; i < *num_vals; i++) {
|
||||
|
||||
/* create the orte_rmaps_base_proc_t object */
|
||||
procs[i] = OBJ_NEW(orte_rmaps_base_proc_t);
|
||||
/* create the orte_mapped_proc_t object */
|
||||
procs[i] = OBJ_NEW(orte_mapped_proc_t);
|
||||
if (NULL == procs[i]) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* unpack the app name */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(procs[i]->app), &n, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the proc_node */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(procs[i]->proc_node), &n, ORTE_MAPPED_NODE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the proc name */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(procs[i]->proc_name), &n, ORTE_NAME))) {
|
||||
&(procs[i]->name), &n, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
@ -152,12 +135,12 @@ int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
|
||||
/* unpack the rank */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(procs[i]->proc_rank), &n, ORTE_STD_CNTR))) {
|
||||
&(procs[i]->rank), &n, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the pls-pid */
|
||||
/* unpack the pid */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(procs[i]->pid), &n, ORTE_PID))) {
|
||||
@ -165,10 +148,10 @@ int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the local pid */
|
||||
/* unpack the app_idx */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(procs[i]->local_pid), &n, ORTE_PID))) {
|
||||
&(procs[i]->app_idx), &n, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
@ -185,24 +168,56 @@ int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
|
||||
{
|
||||
int rc;
|
||||
orte_std_cntr_t i, j, n, num_procs;
|
||||
orte_rmaps_base_node_t **nodes;
|
||||
orte_rmaps_base_proc_t *srcproc;
|
||||
orte_mapped_node_t **nodes;
|
||||
orte_mapped_proc_t *srcproc;
|
||||
|
||||
/* unpack into array of orte_rmaps_base_node_t objects */
|
||||
nodes = (orte_rmaps_base_node_t**) dest;
|
||||
/* unpack into array of orte_mapped_node_t objects */
|
||||
nodes = (orte_mapped_node_t**) dest;
|
||||
for (i=0; i < *num_vals; i++) {
|
||||
|
||||
/* create the orte_rmaps_base_node_t object */
|
||||
nodes[i] = OBJ_NEW(orte_rmaps_base_node_t);
|
||||
nodes[i] = OBJ_NEW(orte_mapped_node_t);
|
||||
if (NULL == nodes[i]) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* unpack the node object */
|
||||
/* unpack the cellid */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(nodes[i]->node), &n, ORTE_RAS_NODE))) {
|
||||
&(nodes[i]->cell), &n, ORTE_CELLID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the nodename */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(nodes[i]->nodename), &n, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the username */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(nodes[i]->username), &n, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the daemon's name */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(nodes[i]->daemon), &n, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the oversubscribed flag */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
|
||||
&(nodes[i]->oversubscribed), &n, ORTE_BOOL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
@ -222,7 +237,7 @@ int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
opal_list_append(&(nodes[i]->node_procs), &srcproc->super);
|
||||
opal_list_append(&(nodes[i]->procs), &srcproc->super);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,903 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/orte_types.h"
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/mca/schema/schema.h"
|
||||
#include "orte/mca/gpr/gpr.h"
|
||||
#include "orte/mca/ns/ns.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ras/ras.h"
|
||||
#include "orte/mca/rmgr/rmgr.h"
|
||||
#include "orte/mca/smr/smr_types.h"
|
||||
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
|
||||
/**
|
||||
* orte_rmaps_base_node_t
|
||||
*/
|
||||
|
||||
static void orte_rmaps_base_node_construct(orte_rmaps_base_node_t* node)
|
||||
{
|
||||
node->node = NULL;
|
||||
OBJ_CONSTRUCT(&node->node_procs, opal_list_t);
|
||||
}
|
||||
|
||||
static void orte_rmaps_base_node_destruct(orte_rmaps_base_node_t* node)
|
||||
{
|
||||
opal_list_item_t* item;
|
||||
if(NULL != node->node) {
|
||||
OBJ_RELEASE(node->node);
|
||||
node->node = NULL;
|
||||
}
|
||||
while(NULL != (item = opal_list_remove_first(&node->node_procs))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&node->node_procs);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
orte_rmaps_base_node_t,
|
||||
opal_list_item_t,
|
||||
orte_rmaps_base_node_construct,
|
||||
orte_rmaps_base_node_destruct);
|
||||
|
||||
/**
|
||||
* orte_rmaps_base_proc_t
|
||||
*/
|
||||
|
||||
static void orte_rmaps_base_proc_construct(orte_rmaps_base_proc_t* proc)
|
||||
{
|
||||
proc->app = NULL;
|
||||
proc->proc_node = NULL;
|
||||
proc->pid = 0;
|
||||
proc->local_pid = 0;
|
||||
}
|
||||
|
||||
static void orte_rmaps_base_proc_destruct(orte_rmaps_base_proc_t* proc)
|
||||
{
|
||||
if (NULL != proc->app) {
|
||||
free(proc->app);
|
||||
proc->app = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
orte_rmaps_base_proc_t,
|
||||
opal_list_item_t,
|
||||
orte_rmaps_base_proc_construct,
|
||||
orte_rmaps_base_proc_destruct);
|
||||
|
||||
|
||||
/**
|
||||
* orte_rmaps_base_map_t
|
||||
*/
|
||||
|
||||
static void orte_rmaps_base_map_construct(orte_rmaps_base_map_t* map)
|
||||
{
|
||||
map->app = NULL;
|
||||
map->procs = NULL;
|
||||
map->num_procs = 0;
|
||||
OBJ_CONSTRUCT(&map->nodes, opal_list_t);
|
||||
}
|
||||
|
||||
static void orte_rmaps_base_map_destruct(orte_rmaps_base_map_t* map)
|
||||
{
|
||||
orte_std_cntr_t i=0;
|
||||
opal_list_item_t* item;
|
||||
|
||||
for(i=0; i<map->num_procs; i++) {
|
||||
OBJ_RELEASE(map->procs[i]);
|
||||
}
|
||||
while(NULL != (item = opal_list_remove_first(&map->nodes)))
|
||||
OBJ_RELEASE(item);
|
||||
if(NULL != map->procs) {
|
||||
free(map->procs);
|
||||
map->procs = NULL;
|
||||
}
|
||||
if(NULL != map->app) {
|
||||
OBJ_RELEASE(map->app);
|
||||
map->app = NULL;
|
||||
}
|
||||
OBJ_DESTRUCT(&map->nodes);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
orte_rmaps_base_map_t,
|
||||
opal_list_item_t,
|
||||
orte_rmaps_base_map_construct,
|
||||
orte_rmaps_base_map_destruct);
|
||||
|
||||
|
||||
/*
|
||||
* Compare two proc entries
|
||||
*/
|
||||
|
||||
static int orte_rmaps_value_compare(orte_gpr_value_t** val1, orte_gpr_value_t** val2)
|
||||
{
|
||||
orte_std_cntr_t i;
|
||||
orte_std_cntr_t app1 = 0;
|
||||
orte_std_cntr_t app2 = 0;
|
||||
orte_std_cntr_t rank1 = 0;
|
||||
orte_std_cntr_t rank2 = 0;
|
||||
orte_std_cntr_t *sptr;
|
||||
orte_gpr_value_t* value;
|
||||
int rc;
|
||||
|
||||
for(i=0, value=*val1; i<value->cnt; i++) {
|
||||
orte_gpr_keyval_t* keyval = value->keyvals[i];
|
||||
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
rank1 = *sptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
app1 = *sptr;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
for(i=0, value=*val2; i<value->cnt; i++) {
|
||||
orte_gpr_keyval_t* keyval = value->keyvals[i];
|
||||
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
rank2 = *sptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
app2 = *sptr;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if(app1 < app2)
|
||||
return -1;
|
||||
if(app1 > app2)
|
||||
return +1;
|
||||
if(rank1 < rank2)
|
||||
return -1;
|
||||
if(rank1 > rank2)
|
||||
return +1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Obtain the mapping for this job, and the list of nodes confined to that mapping.
|
||||
*
|
||||
* Use this instead of orte_ras_base_node_query when past the RMAPS framework
|
||||
* since components like the PLS are only conserned with those nodes that they
|
||||
* been mapped on, not all of the nodes allocated to their job. In the case
|
||||
* where we are allocated 10 nodes from the RAS, but only map to 2 of them
|
||||
* then we don't try to launch orteds on all 10 nodes, just the 2 mapped.
|
||||
*/
|
||||
int orte_rmaps_base_mapped_node_query(opal_list_t* mapping_list, opal_list_t* nodes_alloc, orte_jobid_t jobid)
|
||||
{
|
||||
opal_list_item_t *item_a, *item_m, *item_n;
|
||||
int num_mapping = 0;
|
||||
int rc = ORTE_SUCCESS;
|
||||
bool matched = false;
|
||||
|
||||
/* get the mapping for this job */
|
||||
rc = orte_rmaps_base_get_map(jobid, mapping_list);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
num_mapping = opal_list_get_size(mapping_list);
|
||||
|
||||
/* Create a list of nodes that are in the mapping */
|
||||
for( item_m = opal_list_get_first(mapping_list);
|
||||
item_m != opal_list_get_end(mapping_list);
|
||||
item_m = opal_list_get_next(item_m)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item_m;
|
||||
|
||||
/* Iterate over all the nodes mapped and check them against the
|
||||
* allocated node list */
|
||||
for( item_n = opal_list_get_first(&(map->nodes));
|
||||
item_n != opal_list_get_end(&(map->nodes));
|
||||
item_n = opal_list_get_next(item_n)) {
|
||||
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)item_n;
|
||||
matched = false;
|
||||
|
||||
/* If this node is in the list already, skip it */
|
||||
if(num_mapping > 1) {
|
||||
for( item_a = opal_list_get_first(nodes_alloc);
|
||||
item_a != opal_list_get_end(nodes_alloc);
|
||||
item_a = opal_list_get_next(item_a)) {
|
||||
orte_ras_node_t* ras_node = (orte_ras_node_t*)item_a;
|
||||
if( rmaps_node->node == ras_node) {
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(matched) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise
|
||||
* - Add it to the allocated list of nodes
|
||||
*/
|
||||
OBJ_RETAIN(rmaps_node->node);
|
||||
opal_list_append(nodes_alloc, &rmaps_node->node->super);
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Lookup node (if it exists) in the list. If it doesn't exist, create a new
|
||||
* node and append to the table.
|
||||
*/
|
||||
|
||||
static orte_rmaps_base_node_t*
|
||||
orte_rmaps_lookup_node(opal_list_t* rmaps_nodes, opal_list_t* ras_nodes, char* node_name, orte_rmaps_base_proc_t* proc)
|
||||
{
|
||||
opal_list_item_t* item;
|
||||
for(item = opal_list_get_first(rmaps_nodes);
|
||||
item != opal_list_get_end(rmaps_nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rmaps_base_node_t* node = (orte_rmaps_base_node_t*)item;
|
||||
if(strcmp(node->node->node_name, node_name) == 0) {
|
||||
OBJ_RETAIN(proc);
|
||||
opal_list_append(&node->node_procs, &proc->super);
|
||||
return node;
|
||||
}
|
||||
}
|
||||
for(item = opal_list_get_first(ras_nodes);
|
||||
item != opal_list_get_end(ras_nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_ras_node_t* ras_node = (orte_ras_node_t*)item;
|
||||
if(strcmp(ras_node->node_name, node_name) == 0) {
|
||||
orte_rmaps_base_node_t* node = OBJ_NEW(orte_rmaps_base_node_t);
|
||||
OBJ_RETAIN(ras_node);
|
||||
node->node = ras_node;
|
||||
OBJ_RETAIN(proc);
|
||||
opal_list_append(&node->node_procs, &proc->super);
|
||||
opal_list_prepend(rmaps_nodes, &node->super);
|
||||
return node;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Query the process mapping from the registry.
|
||||
*/
|
||||
|
||||
int orte_rmaps_base_get_map(orte_jobid_t jobid, opal_list_t* mapping_list)
|
||||
{
|
||||
orte_app_context_t** app_context = NULL;
|
||||
orte_rmaps_base_map_t** mapping = NULL;
|
||||
opal_list_t nodes;
|
||||
opal_list_item_t* item;
|
||||
orte_std_cntr_t i, num_context = 0;
|
||||
orte_std_cntr_t *sptr;
|
||||
orte_process_name_t *pptr;
|
||||
pid_t *pidptr;
|
||||
char* segment = NULL;
|
||||
orte_gpr_value_t** values;
|
||||
orte_std_cntr_t v, num_values;
|
||||
int rc;
|
||||
char* keys[] = {
|
||||
ORTE_PROC_RANK_KEY,
|
||||
ORTE_PROC_NAME_KEY,
|
||||
ORTE_PROC_APP_CONTEXT_KEY,
|
||||
ORTE_PROC_PID_KEY,
|
||||
ORTE_PROC_LOCAL_PID_KEY,
|
||||
ORTE_NODE_NAME_KEY,
|
||||
NULL
|
||||
};
|
||||
|
||||
/* query the application context */
|
||||
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &app_context, &num_context))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* query the node list */
|
||||
OBJ_CONSTRUCT(&nodes, opal_list_t);
|
||||
if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(&nodes,jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* build the mapping */
|
||||
if(NULL == (mapping = (orte_rmaps_base_map_t**)malloc(sizeof(orte_rmaps_base_map_t*) * num_context))) {
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
for(i=0; i<num_context; i++) {
|
||||
orte_rmaps_base_map_t* map = OBJ_NEW(orte_rmaps_base_map_t);
|
||||
orte_app_context_t* app = app_context[i];
|
||||
map->app = app;
|
||||
if (0 < app->num_procs) {
|
||||
map->procs = (orte_rmaps_base_proc_t**)malloc(sizeof(orte_rmaps_base_proc_t*) * app->num_procs);
|
||||
if(NULL == map->procs) {
|
||||
OBJ_RELEASE(map);
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
} else {
|
||||
if (1 < num_context) { /** can't have multiple contexts if zero num_procs */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_INVALID_NUM_PROCS);
|
||||
rc = ORTE_ERR_INVALID_NUM_PROCS;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
map->num_procs = 0;
|
||||
mapping[i] = map;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* query the process list from the registry */
|
||||
rc = orte_gpr.get(
|
||||
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
|
||||
segment,
|
||||
NULL,
|
||||
keys,
|
||||
&num_values,
|
||||
&values);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* sort the response */
|
||||
qsort(values, num_values, sizeof(orte_gpr_value_t*),
|
||||
(int (*)(const void*,const void*))orte_rmaps_value_compare);
|
||||
|
||||
/* build the proc list */
|
||||
for(v=0; v<num_values; v++) {
|
||||
orte_gpr_value_t* value = values[v];
|
||||
orte_rmaps_base_map_t* map = NULL;
|
||||
orte_rmaps_base_proc_t* proc;
|
||||
char* node_name = NULL;
|
||||
orte_std_cntr_t kv, app_index;
|
||||
|
||||
proc = OBJ_NEW(orte_rmaps_base_proc_t);
|
||||
if(NULL == proc) {
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
for(kv = 0; kv<value->cnt; kv++) {
|
||||
orte_gpr_keyval_t* keyval = value->keyvals[kv];
|
||||
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
proc->proc_rank = *sptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
proc->proc_name = *pptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
app_index = *sptr;
|
||||
if(app_index >= num_context) {
|
||||
rc = ORTE_ERR_BAD_PARAM;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
map = mapping[app_index];
|
||||
proc->app = strdup(app_context[app_index]->app);
|
||||
continue;
|
||||
}
|
||||
if (strcmp(keyval->key, ORTE_PROC_PID_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
proc->pid = *pidptr;
|
||||
continue;
|
||||
}
|
||||
if (strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
proc->local_pid = *pidptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
|
||||
/* use the dss.copy function here to protect us against zero-length strings */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* global record */
|
||||
if(NULL == map) {
|
||||
OBJ_RELEASE(proc);
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* This seems like a dummy check, but it ensures that we fail
|
||||
* rather than overrun our array. This can happen if the
|
||||
* indicies on the app schemas are incorrect
|
||||
*/
|
||||
if(map->num_procs < map->app->num_procs) {
|
||||
map->procs[map->num_procs++] = proc;
|
||||
proc->proc_node = orte_rmaps_lookup_node(&map->nodes, &nodes, node_name, proc);
|
||||
}
|
||||
else {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup any nodes allocated and not mapped */
|
||||
while(NULL != (item = opal_list_remove_first(&nodes))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
|
||||
/* release temporary variables */
|
||||
for(i=0; i<num_context; i++) {
|
||||
opal_list_append(mapping_list, &mapping[i]->super);
|
||||
}
|
||||
free(segment);
|
||||
free(app_context);
|
||||
free(mapping);
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
cleanup:
|
||||
if(NULL != segment)
|
||||
free(segment);
|
||||
if(NULL != app_context) {
|
||||
for(i=0; i<num_context; i++) {
|
||||
OBJ_RELEASE(app_context[i]);
|
||||
}
|
||||
free(app_context);
|
||||
}
|
||||
if(NULL != mapping) {
|
||||
for(i=0; i<num_context; i++) {
|
||||
if(NULL != mapping[i])
|
||||
OBJ_RELEASE(mapping[i]);
|
||||
}
|
||||
free(mapping);
|
||||
}
|
||||
|
||||
/* cleanup any nodes allocated and not mapped */
|
||||
while(NULL != (item = opal_list_remove_first(&nodes))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query the process mapping for a specified node from the registry.
|
||||
*/
|
||||
|
||||
int orte_rmaps_base_get_node_map(
|
||||
orte_cellid_t cellid,
|
||||
orte_jobid_t jobid,
|
||||
const char* hostname,
|
||||
opal_list_t* mapping_list)
|
||||
{
|
||||
orte_app_context_t** app_context = NULL;
|
||||
orte_rmaps_base_map_t** mapping = NULL;
|
||||
orte_ras_node_t *ras_node = NULL;
|
||||
orte_gpr_keyval_t *condition;
|
||||
orte_std_cntr_t i, num_context = 0;
|
||||
orte_std_cntr_t *sptr;
|
||||
pid_t *pidptr;
|
||||
orte_process_name_t *pptr;
|
||||
char* segment = NULL;
|
||||
orte_gpr_value_t** values;
|
||||
orte_std_cntr_t v, num_values;
|
||||
int rc;
|
||||
char* keys[] = {
|
||||
ORTE_PROC_RANK_KEY,
|
||||
ORTE_PROC_NAME_KEY,
|
||||
ORTE_PROC_APP_CONTEXT_KEY,
|
||||
ORTE_PROC_PID_KEY,
|
||||
ORTE_PROC_LOCAL_PID_KEY,
|
||||
ORTE_NODE_NAME_KEY,
|
||||
NULL
|
||||
};
|
||||
|
||||
/* allocate the node */
|
||||
if(NULL == (ras_node = orte_ras.node_lookup(cellid,hostname))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* query the application context */
|
||||
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &app_context, &num_context))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if(NULL == (mapping = (orte_rmaps_base_map_t**)malloc(sizeof(orte_rmaps_base_map_t*) * num_context))) {
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
for(i=0; i<num_context; i++) {
|
||||
orte_rmaps_base_map_t* map = OBJ_NEW(orte_rmaps_base_map_t);
|
||||
orte_app_context_t* app = app_context[i];
|
||||
OBJ_RETAIN(app);
|
||||
map->app = app;
|
||||
map->procs = (orte_rmaps_base_proc_t**)malloc(sizeof(orte_rmaps_base_proc_t*) * app->num_procs);
|
||||
if(NULL == map->procs) {
|
||||
OBJ_RELEASE(map);
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
map->num_procs = 0;
|
||||
mapping[i] = map;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* setup condition/filter for query - return only processes that
|
||||
* are assigned to the specified node name
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&condition, ORTE_NODE_NAME_KEY, ORTE_STRING, (void*)hostname))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* query the process list from the registry */
|
||||
rc = orte_gpr.get_conditional(
|
||||
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
|
||||
segment,
|
||||
NULL,
|
||||
keys,
|
||||
1,
|
||||
&condition,
|
||||
&num_values,
|
||||
&values);
|
||||
|
||||
/* sort the response */
|
||||
qsort(values, num_values, sizeof(orte_gpr_value_t*),
|
||||
(int (*)(const void*,const void*))orte_rmaps_value_compare);
|
||||
|
||||
/* build the proc list */
|
||||
for(v=0; v<num_values; v++) {
|
||||
orte_gpr_value_t* value = values[v];
|
||||
orte_rmaps_base_map_t* map = NULL;
|
||||
orte_rmaps_base_node_t *node = NULL;
|
||||
orte_rmaps_base_proc_t* proc;
|
||||
char* node_name = NULL;
|
||||
orte_std_cntr_t kv, app_index;
|
||||
|
||||
proc = OBJ_NEW(orte_rmaps_base_proc_t);
|
||||
if(NULL == proc) {
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
for(kv = 0; kv<value->cnt; kv++) {
|
||||
orte_gpr_keyval_t* keyval = value->keyvals[kv];
|
||||
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
proc->proc_rank = *sptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
proc->proc_name = *pptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
app_index = *sptr;
|
||||
if(app_index >= num_context) {
|
||||
rc = ORTE_ERR_BAD_PARAM;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
map = mapping[app_index];
|
||||
if(opal_list_get_size(&map->nodes) == 0) {
|
||||
node = OBJ_NEW(orte_rmaps_base_node_t);
|
||||
if(NULL == node) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
goto cleanup;
|
||||
}
|
||||
OBJ_RETAIN(ras_node);
|
||||
node->node = ras_node;
|
||||
opal_list_append(&map->nodes, &node->super);
|
||||
} else {
|
||||
node = (orte_rmaps_base_node_t*)opal_list_get_first(&map->nodes);
|
||||
}
|
||||
proc->app = strdup(app_context[app_index]->app);
|
||||
continue;
|
||||
}
|
||||
if (strcmp(keyval->key, ORTE_PROC_PID_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
proc->pid = *pidptr;
|
||||
continue;
|
||||
}
|
||||
if (strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
proc->local_pid = *pidptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
|
||||
/* use the dss.copy function here to protect us against zero-length strings */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* skip this entry? */
|
||||
if(NULL == map ||
|
||||
proc->proc_name.cellid != cellid) {
|
||||
OBJ_RELEASE(proc);
|
||||
continue;
|
||||
}
|
||||
map->procs[map->num_procs++] = proc;
|
||||
OBJ_RETAIN(proc);
|
||||
opal_list_append(&node->node_procs, &proc->super);
|
||||
proc->proc_node = node;
|
||||
}
|
||||
|
||||
/* return mapping for the entries that have procs on this node */
|
||||
for(i=0; i<num_context; i++) {
|
||||
orte_rmaps_base_map_t* map = mapping[i];
|
||||
if(map->num_procs) {
|
||||
opal_list_append(mapping_list, &map->super);
|
||||
} else {
|
||||
OBJ_RELEASE(map);
|
||||
}
|
||||
}
|
||||
|
||||
/* decrement reference count on node */
|
||||
OBJ_RELEASE(ras_node);
|
||||
|
||||
/* release all app context - note the reference count was bumped
|
||||
* if saved in the map
|
||||
*/
|
||||
for(i=0; i<num_context; i++) {
|
||||
OBJ_RELEASE(app_context[i]);
|
||||
}
|
||||
free(segment);
|
||||
free(app_context);
|
||||
free(mapping);
|
||||
OBJ_RELEASE(condition);
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
cleanup:
|
||||
if(NULL != segment)
|
||||
free(segment);
|
||||
if(NULL != app_context) {
|
||||
for(i=0; i<num_context; i++) {
|
||||
OBJ_RELEASE(app_context[i]);
|
||||
}
|
||||
free(app_context);
|
||||
}
|
||||
if(NULL != mapping) {
|
||||
for(i=0; i<num_context; i++) {
|
||||
if(NULL != mapping[i])
|
||||
OBJ_RELEASE(mapping[i]);
|
||||
}
|
||||
free(mapping);
|
||||
}
|
||||
if (NULL != condition)
|
||||
OBJ_RELEASE(condition);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the process mapping in the registry.
|
||||
*/
|
||||
|
||||
int orte_rmaps_base_set_map(orte_jobid_t jobid, opal_list_t* mapping_list)
|
||||
{
|
||||
orte_std_cntr_t i, j;
|
||||
orte_std_cntr_t index=0;
|
||||
orte_std_cntr_t num_procs = 0;
|
||||
int rc = ORTE_SUCCESS;
|
||||
opal_list_item_t* item;
|
||||
orte_gpr_value_t** values;
|
||||
char *segment;
|
||||
|
||||
for(item = opal_list_get_first(mapping_list);
|
||||
item != opal_list_get_end(mapping_list);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item;
|
||||
num_procs += map->num_procs;
|
||||
}
|
||||
if(num_procs == 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/**
|
||||
* allocate value array. We need to reserve one extra spot so we can set the counter
|
||||
* for the process INIT state to indicate that all procs are at that state. This will
|
||||
* allow the INIT trigger to fire.
|
||||
*/
|
||||
values = (orte_gpr_value_t**)malloc((1+num_procs) * sizeof(orte_gpr_value_t*));
|
||||
if(NULL == values) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment,jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(values);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** setup the last value in the array to update the INIT counter */
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[num_procs]),
|
||||
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
|
||||
segment, 1, 1))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(values);
|
||||
free(segment);
|
||||
return rc;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[num_procs]->keyvals[0]), ORTE_PROC_NUM_AT_INIT, ORTE_STD_CNTR, &num_procs))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
values[num_procs]->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* counter is in the job's globals container */
|
||||
|
||||
|
||||
for(i=0; i<num_procs; i++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
|
||||
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
|
||||
segment, 7, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
for(j=0; j<i; j++) {
|
||||
OBJ_RELEASE(values[j]);
|
||||
}
|
||||
free(values);
|
||||
free(segment);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* iterate through all processes and initialize value array */
|
||||
for(item = opal_list_get_first(mapping_list);
|
||||
item != opal_list_get_end(mapping_list);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item;
|
||||
orte_std_cntr_t p;
|
||||
for(p=0; p<map->num_procs; p++) {
|
||||
orte_rmaps_base_proc_t* proc = map->procs[p];
|
||||
orte_gpr_value_t* value = values[index++];
|
||||
orte_proc_state_t proc_state=ORTE_PROC_STATE_INIT;
|
||||
|
||||
/* initialize keyvals */
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_PROC_RANK_KEY, ORTE_STD_CNTR, &(proc->proc_rank)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_PROC_NAME_KEY, ORTE_NAME, &(proc->proc_name)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_NODE_NAME_KEY, ORTE_STRING, proc->proc_node->node->node_name))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &(map->app->idx)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[5]), ORTE_PROC_PID_KEY, ORTE_PID, &(proc->pid)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[6]), ORTE_PROC_LOCAL_PID_KEY, ORTE_PID, &(proc->local_pid)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* set the tokens */
|
||||
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(value->tokens), &(value->num_tokens), &(proc->proc_name)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* insert all values in one call */
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.put((1+num_procs), values))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
for(i=0; i<num_procs; i++) {
|
||||
if(NULL != values[i]) {
|
||||
OBJ_RELEASE(values[i]);
|
||||
}
|
||||
}
|
||||
if(NULL != values)
|
||||
free(values);
|
||||
return rc;
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ static orte_rmaps_base_module_t *select_any(void);
|
||||
* Function for selecting one component from all those that are
|
||||
* available.
|
||||
*/
|
||||
int orte_rmaps_base_map(orte_jobid_t job, char *desired_mapper)
|
||||
int orte_rmaps_base_map_job(orte_jobid_t job, char *desired_mapper)
|
||||
{
|
||||
orte_rmaps_base_module_t *module=NULL;
|
||||
int rc;
|
||||
|
@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/orte_types.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/mca/ns/ns_types.h"
|
||||
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
|
||||
|
||||
int orte_rmaps_base_map_no_op(orte_jobid_t job, char *desired_mapper)
|
||||
{
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
@ -50,14 +50,17 @@ orte_rmaps_base_t orte_rmaps_base;
|
||||
* Declare the RMAPS module to hold the API function pointers
|
||||
*/
|
||||
orte_rmaps_base_module_t orte_rmaps = {
|
||||
orte_rmaps_base_map,
|
||||
orte_rmaps_base_map_job,
|
||||
orte_rmaps_base_get_job_map,
|
||||
orte_rmaps_base_get_node_map,
|
||||
orte_rmaps_base_finalize
|
||||
};
|
||||
|
||||
orte_rmaps_base_module_t orte_rmaps_no_op = {
|
||||
orte_rmaps_base_map_no_op,
|
||||
orte_rmaps_base_finalize
|
||||
};
|
||||
/*
|
||||
* Include all the RMAPS class instance declarations
|
||||
*/
|
||||
#include "orte/mca/rmaps/base/rmaps_class_instances.h"
|
||||
|
||||
|
||||
/**
|
||||
* Function for finding and opening either all MCA components, or the one
|
||||
@ -66,7 +69,7 @@ orte_rmaps_base_module_t orte_rmaps_no_op = {
|
||||
int orte_rmaps_base_open(void)
|
||||
{
|
||||
int param, rc, value;
|
||||
char *policy, *requested;
|
||||
char *policy;
|
||||
orte_data_type_t tmp;
|
||||
|
||||
/* Debugging / verbose output */
|
||||
@ -150,30 +153,7 @@ int orte_rmaps_base_open(void)
|
||||
}
|
||||
|
||||
|
||||
/* Some systems do not want any RMAPS support. In those cases,
|
||||
* memory consumption is also an issue. For those systems, we
|
||||
* avoid opening the RMAPS components by checking for a directive
|
||||
* to use the "null" component.
|
||||
*/
|
||||
param = mca_base_param_reg_string_name("rmaps", NULL, NULL,
|
||||
false, false, NULL, NULL);
|
||||
if (ORTE_ERROR == mca_base_param_lookup_string(param, &requested)) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
if (NULL != requested && 0 == strcmp(requested, "null")) {
|
||||
/* the user has specifically requested that we use the "null"
|
||||
* component. In this case, that means we do NOT open any
|
||||
* components, and we simply use the default module we have
|
||||
* already defined above
|
||||
*/
|
||||
orte_rmaps_base.no_op_selected = true;
|
||||
orte_rmaps = orte_rmaps_no_op; /* use the no_op module */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
orte_rmaps_base.no_op_selected = false;
|
||||
|
||||
/* Open up all the components that we can find */
|
||||
|
||||
if (ORTE_SUCCESS !=
|
||||
mca_base_components_open("rmaps", orte_rmaps_base.rmaps_output,
|
||||
mca_rmaps_base_static_components,
|
||||
|
413
orte/mca/rmaps/base/rmaps_base_registry_fns.c
Обычный файл
413
orte/mca/rmaps/base/rmaps_base_registry_fns.c
Обычный файл
@ -0,0 +1,413 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/orte_types.h"
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/trace.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/mca/schema/schema.h"
|
||||
#include "orte/mca/gpr/gpr.h"
|
||||
#include "orte/mca/ns/ns.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/rmgr/rmgr.h"
|
||||
#include "orte/mca/smr/smr_types.h"
|
||||
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
|
||||
/*
|
||||
* Query the process mapping from the registry.
|
||||
*/
|
||||
int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
|
||||
{
|
||||
orte_job_map_t *mapping;
|
||||
orte_mapped_proc_t *proc;
|
||||
orte_cellid_t *cellptr, cell;
|
||||
orte_std_cntr_t *sptr;
|
||||
bool *bptr, oversub;
|
||||
pid_t *pidptr;
|
||||
orte_process_name_t *pptr;
|
||||
char *segment;
|
||||
char *node_name;
|
||||
char *username;
|
||||
orte_gpr_value_t **values, *value;
|
||||
orte_gpr_keyval_t* keyval;
|
||||
orte_std_cntr_t v, kv, num_values;
|
||||
int rc;
|
||||
char* keys[] = {
|
||||
ORTE_PROC_RANK_KEY,
|
||||
ORTE_PROC_NAME_KEY,
|
||||
ORTE_PROC_APP_CONTEXT_KEY,
|
||||
ORTE_PROC_LOCAL_PID_KEY,
|
||||
ORTE_CELLID_KEY,
|
||||
ORTE_NODE_NAME_KEY,
|
||||
ORTE_NODE_USERNAME_KEY,
|
||||
ORTE_NODE_OVERSUBSCRIBED_KEY,
|
||||
NULL
|
||||
};
|
||||
|
||||
OPAL_TRACE(1);
|
||||
|
||||
/* define default answer */
|
||||
*map = NULL;
|
||||
|
||||
/* create the object */
|
||||
mapping = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == mapping) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* store the jobid */
|
||||
mapping->job = jobid;
|
||||
|
||||
/* get the job segment name */
|
||||
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(mapping);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* query the application context */
|
||||
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &(mapping->apps), &(mapping->num_apps)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* query the process list from the registry */
|
||||
rc = orte_gpr.get(
|
||||
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
|
||||
segment,
|
||||
NULL,
|
||||
keys,
|
||||
&num_values,
|
||||
&values);
|
||||
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(mapping);
|
||||
free(segment);
|
||||
return rc;
|
||||
}
|
||||
free(segment);
|
||||
|
||||
/* build the node and proc lists. each value corresponds
|
||||
* to a process in the map
|
||||
*/
|
||||
for(v=0; v<num_values; v++) {
|
||||
value = values[v];
|
||||
node_name = NULL;
|
||||
|
||||
proc = OBJ_NEW(orte_mapped_proc_t);
|
||||
if(NULL == proc) {
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
for(kv = 0; kv<value->cnt; kv++) {
|
||||
keyval = value->keyvals[kv];
|
||||
|
||||
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
proc->rank = *sptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
proc->name = *pptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
proc->app_idx = *sptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
proc->pid = *pidptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cellptr, keyval->value, ORTE_CELLID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
cell = *cellptr;
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
|
||||
/* use the dss.copy function here to protect us against zero-length strings */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_NODE_USERNAME_KEY) == 0) {
|
||||
/* use the dss.copy function here to protect us against zero-length strings */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&username, keyval->value->data, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if(strcmp(keyval->key, ORTE_NODE_OVERSUBSCRIBED_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, keyval->value, ORTE_BOOL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
oversub = *bptr;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* store this process in the map */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(mapping, cell, node_name, username, oversub, proc))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
if (NULL != node_name) free(node_name);
|
||||
}
|
||||
|
||||
/* all done */
|
||||
*map = mapping;
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
cleanup:
|
||||
OBJ_RELEASE(mapping);
|
||||
|
||||
for (v=0; v < num_values; v++) {
|
||||
OBJ_RELEASE(values[v]);
|
||||
}
|
||||
if (NULL != values) free(values);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell,
|
||||
char *nodename, orte_jobid_t job)
|
||||
{
|
||||
orte_job_map_t *map;
|
||||
opal_list_item_t *item;
|
||||
orte_mapped_node_t *nptr;
|
||||
int rc;
|
||||
|
||||
/* set default answer */
|
||||
*node = NULL;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_job_map(&map, job))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* scan the map for the indicated node */
|
||||
for (item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
nptr = (orte_mapped_node_t*)item;
|
||||
|
||||
if (cell == nptr->cell && 0 == strcmp(nodename, nptr->nodename)) {
|
||||
*node = nptr;
|
||||
/* protect the node object from release when we get rid
|
||||
* of the map object
|
||||
*/
|
||||
opal_list_remove_item(&map->nodes, item);
|
||||
OBJ_RELEASE(map);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we get here, then the node wasn't found */
|
||||
OBJ_RELEASE(map);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set the process mapping in the registry.
|
||||
*/
|
||||
|
||||
int orte_rmaps_base_put_job_map(orte_job_map_t *map)
|
||||
{
|
||||
orte_std_cntr_t i, j;
|
||||
orte_std_cntr_t index=0;
|
||||
orte_std_cntr_t num_procs = 0;
|
||||
int rc = ORTE_SUCCESS;
|
||||
opal_list_item_t *item, *item2;
|
||||
orte_gpr_value_t **values, *value;
|
||||
char *segment;
|
||||
orte_mapped_node_t *node;
|
||||
orte_mapped_proc_t *proc;
|
||||
orte_proc_state_t proc_state=ORTE_PROC_STATE_INIT;
|
||||
|
||||
OPAL_TRACE(2);
|
||||
|
||||
for(item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
node = (orte_mapped_node_t*)item;
|
||||
num_procs += opal_list_get_size(&node->procs);
|
||||
}
|
||||
if(num_procs == 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/**
|
||||
* allocate value array. We need to reserve one extra spot so we can set the counter
|
||||
* for the process INIT state to indicate that all procs are at that state. This will
|
||||
* allow the INIT trigger to fire.
|
||||
*/
|
||||
values = (orte_gpr_value_t**)malloc((1+num_procs) * sizeof(orte_gpr_value_t*));
|
||||
if(NULL == values) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, map->job))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(values);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** setup the last value in the array to update the INIT counter */
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[num_procs]),
|
||||
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
|
||||
segment, 1, 1))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(values);
|
||||
free(segment);
|
||||
return rc;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[num_procs]->keyvals[0]), ORTE_PROC_NUM_AT_INIT, ORTE_STD_CNTR, &num_procs))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
values[num_procs]->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* counter is in the job's globals container */
|
||||
|
||||
|
||||
for(i=0; i<num_procs; i++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
|
||||
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
|
||||
segment, 8, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
for(j=0; j<i; j++) {
|
||||
OBJ_RELEASE(values[j]);
|
||||
}
|
||||
free(values);
|
||||
free(segment);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* iterate through all processes and initialize value array */
|
||||
for(item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
node = (orte_mapped_node_t*)item;
|
||||
|
||||
for (item2 = opal_list_get_first(&node->procs);
|
||||
item2 != opal_list_get_end(&node->procs);
|
||||
item2 = opal_list_get_next(item2)) {
|
||||
proc = (orte_mapped_proc_t*)item2;
|
||||
|
||||
value = values[index++];
|
||||
|
||||
/* initialize keyvals */
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_PROC_RANK_KEY, ORTE_STD_CNTR, &(proc->rank)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_PROC_NAME_KEY, ORTE_NAME, &(proc->name)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_CELLID_KEY, ORTE_CELLID, &(node->cell)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), ORTE_NODE_NAME_KEY, ORTE_STRING, node->nodename))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), ORTE_NODE_USERNAME_KEY, ORTE_STRING, node->username))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[5]), ORTE_NODE_OVERSUBSCRIBED_KEY, ORTE_BOOL, &(node->oversubscribed)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[6]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &(proc->app_idx)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[7]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* set the tokens */
|
||||
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(value->tokens), &(value->num_tokens), &(proc->name)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* insert all values in one call */
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.put((1+num_procs), values))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
for(i=0; i<num_procs; i++) {
|
||||
if(NULL != values[i]) {
|
||||
OBJ_RELEASE(values[i]);
|
||||
}
|
||||
}
|
||||
if(NULL != values)
|
||||
free(values);
|
||||
return rc;
|
||||
}
|
||||
|
@ -48,6 +48,7 @@ static bool are_all_mapped_valid(char **mapping,
|
||||
opal_list_t* nodes)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
orte_ras_node_t *node;
|
||||
int i;
|
||||
bool matched;
|
||||
|
||||
@ -57,7 +58,8 @@ static bool are_all_mapped_valid(char **mapping,
|
||||
for(item = opal_list_get_first(nodes);
|
||||
item != opal_list_get_end(nodes);
|
||||
item = opal_list_get_next(item) ) {
|
||||
if( 0 == strcmp( ((orte_ras_node_t*) item)->node_name, mapping[i]) ) {
|
||||
node = (orte_ras_node_t*) item;
|
||||
if( 0 == strcmp(node->node_name, mapping[i]) ) {
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
@ -94,7 +96,7 @@ static bool is_mapped(opal_list_item_t *item,
|
||||
/*
|
||||
* Query the registry for all nodes allocated to a specified job
|
||||
*/
|
||||
int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots)
|
||||
int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots)
|
||||
{
|
||||
opal_list_item_t *item, *next;
|
||||
orte_ras_node_t *node;
|
||||
@ -104,7 +106,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
|
||||
/** set default answer */
|
||||
*total_num_slots = 0;
|
||||
|
||||
if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(nodes, jobid))) {
|
||||
/* get the allocation for this job */
|
||||
if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(allocated_nodes, jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
@ -115,21 +118,21 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
|
||||
id = mca_base_param_find("rmaps", NULL, "base_schedule_local");
|
||||
mca_base_param_lookup_int(id, &nolocal);
|
||||
if (0 == nolocal) {
|
||||
for (item = opal_list_get_first(nodes);
|
||||
item != opal_list_get_end(nodes);
|
||||
for (item = opal_list_get_first(allocated_nodes);
|
||||
item != opal_list_get_end(allocated_nodes);
|
||||
item = opal_list_get_next(item) ) {
|
||||
if (0 == strcmp(((orte_ras_node_t *) item)->node_name,
|
||||
orte_system_info.nodename) ||
|
||||
opal_ifislocal(((orte_ras_node_t *) item)->node_name)) {
|
||||
opal_list_remove_item(nodes, item);
|
||||
node = (orte_ras_node_t*)item;
|
||||
if (0 == strcmp(node->node_name, orte_system_info.nodename) ||
|
||||
opal_ifislocal(node->node_name)) {
|
||||
opal_list_remove_item(allocated_nodes, item);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** remove all nodes that are already at max usage */
|
||||
item = opal_list_get_first(nodes);
|
||||
while (item != opal_list_get_end(nodes)) {
|
||||
item = opal_list_get_first(allocated_nodes);
|
||||
while (item != opal_list_get_end(allocated_nodes)) {
|
||||
|
||||
/** save the next pointer in case we remove this node */
|
||||
next = opal_list_get_next(item);
|
||||
@ -137,8 +140,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
|
||||
/** check to see if this node is fully used - remove if so */
|
||||
node = (orte_ras_node_t*)item;
|
||||
if (0 != node->node_slots_max && node->node_slots_inuse > node->node_slots_max) {
|
||||
opal_list_remove_item(nodes, item);
|
||||
} else { /** otherwise, add its slots to the total */
|
||||
opal_list_remove_item(allocated_nodes, item);
|
||||
} else { /** otherwise, add the slots for our job to the total */
|
||||
num_slots += node->node_slots;
|
||||
}
|
||||
|
||||
@ -146,8 +149,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
|
||||
item = next;
|
||||
}
|
||||
|
||||
/* Sanity check to make sure we have been allocated nodes */
|
||||
if (0 == opal_list_get_size(nodes)) {
|
||||
/* Sanity check to make sure we have resources available */
|
||||
if (0 == opal_list_get_size(allocated_nodes)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_TEMP_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
}
|
||||
@ -245,67 +248,108 @@ int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Claim a slot for a specified job on a node
|
||||
*/
|
||||
int orte_rmaps_base_claim_slot(orte_rmaps_base_map_t *map,
|
||||
orte_ras_node_t *current_node,
|
||||
orte_jobid_t jobid, orte_vpid_t vpid,
|
||||
int proc_index,
|
||||
opal_list_t *nodes,
|
||||
opal_list_t *fully_used_nodes)
|
||||
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename,
|
||||
char *username, bool oversubscribed, orte_mapped_proc_t *proc)
|
||||
{
|
||||
orte_rmaps_base_proc_t *proc;
|
||||
orte_process_name_t *proc_name;
|
||||
orte_rmaps_base_node_t *rmaps_node;
|
||||
int rc;
|
||||
opal_list_item_t *item;
|
||||
orte_mapped_node_t *node;
|
||||
|
||||
/* create objects */
|
||||
rmaps_node = OBJ_NEW(orte_rmaps_base_node_t);
|
||||
if (NULL == rmaps_node) {
|
||||
for (item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
node = (orte_mapped_node_t*)item;
|
||||
|
||||
if (cell == node->cell && 0 == strcmp(nodename, node->nodename)) {
|
||||
/* node was found - add this proc to that list */
|
||||
opal_list_append(&node->procs, &proc->super);
|
||||
/* set the oversubscribed flag */
|
||||
node->oversubscribed = oversubscribed;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/* node was NOT found - add this one to the list */
|
||||
node = OBJ_NEW(orte_mapped_node_t);
|
||||
if (NULL == node) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
OBJ_RETAIN(current_node);
|
||||
rmaps_node->node = current_node;
|
||||
proc = OBJ_NEW(orte_rmaps_base_proc_t);
|
||||
node->cell = cell;
|
||||
node->nodename = strdup(nodename);
|
||||
if (NULL != username) {
|
||||
node->username = strdup(username);
|
||||
}
|
||||
node->oversubscribed = oversubscribed;
|
||||
opal_list_append(&map->nodes, &node->super);
|
||||
|
||||
/* and add this proc to the new node's list of procs */
|
||||
opal_list_append(&node->procs, &proc->super);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Claim a slot for a specified job on a node
|
||||
*/
|
||||
int orte_rmaps_base_claim_slot(orte_job_map_t *map,
|
||||
orte_ras_node_t *current_node,
|
||||
orte_jobid_t jobid, orte_vpid_t vpid,
|
||||
orte_std_cntr_t app_idx,
|
||||
opal_list_t *nodes,
|
||||
opal_list_t *fully_used_nodes)
|
||||
{
|
||||
orte_process_name_t *name;
|
||||
orte_mapped_proc_t *proc;
|
||||
bool oversub;
|
||||
int rc;
|
||||
|
||||
/* create mapped_proc object */
|
||||
proc = OBJ_NEW(orte_mapped_proc_t);
|
||||
if (NULL == proc) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
OBJ_RELEASE(rmaps_node);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* create the process name as an offset from the vpid-start */
|
||||
rc = orte_ns.create_process_name(&proc_name, current_node->node_cellid,
|
||||
rc = orte_ns.create_process_name(&name, current_node->node_cellid,
|
||||
jobid, vpid);
|
||||
if (rc != ORTE_SUCCESS) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(proc);
|
||||
OBJ_RELEASE(rmaps_node);
|
||||
return rc;
|
||||
}
|
||||
proc->proc_node = rmaps_node;
|
||||
proc->proc_name = *proc_name;
|
||||
proc->proc_rank = vpid;
|
||||
orte_ns.free_name(&proc_name);
|
||||
OBJ_RETAIN(proc); /* bump reference count for the node */
|
||||
opal_list_append(&rmaps_node->node_procs, &proc->super);
|
||||
map->procs[proc_index] = proc;
|
||||
|
||||
/* Save this node on the map */
|
||||
opal_list_append(&map->nodes, &rmaps_node->super);
|
||||
proc->name = *name;
|
||||
proc->rank = vpid;
|
||||
proc->app_idx = app_idx;
|
||||
|
||||
/* Be sure to demarcate this slot as claimed for the node */
|
||||
current_node->node_slots_inuse++;
|
||||
|
||||
/* see if this node is oversubscribed now */
|
||||
if (current_node->node_slots_inuse >= current_node->node_slots) {
|
||||
oversub = true;
|
||||
} else {
|
||||
oversub = false;
|
||||
}
|
||||
|
||||
/* add the proc to the map */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(map, current_node->node_cellid,
|
||||
current_node->node_name,
|
||||
current_node->node_username,
|
||||
oversub, proc))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(proc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Remove this node if it has reached its max number of allocatable slots OR it has
|
||||
* reached the soft limit AND we are in a "no oversubscribe" state
|
||||
*/
|
||||
if ((0 != current_node->node_slots_max &&
|
||||
current_node->node_slots_inuse >= current_node->node_slots_max) ||
|
||||
(!orte_rmaps_base.oversubscribe &&
|
||||
current_node->node_slots_inuse >= current_node->node_slots)) {
|
||||
(!orte_rmaps_base.oversubscribe && oversub)) {
|
||||
opal_list_remove_item(nodes, (opal_list_item_t*)current_node);
|
||||
/* add it to the list of fully used nodes */
|
||||
opal_list_append(fully_used_nodes, ¤t_node->super);
|
142
orte/mca/rmaps/base/rmaps_class_instances.h
Обычный файл
142
orte/mca/rmaps/base/rmaps_class_instances.h
Обычный файл
@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file:
|
||||
*/
|
||||
|
||||
#ifndef ORTE_MCA_RMAPS_CLASS_INST_H
|
||||
#define ORTE_MCA_RMAPS_CLASS_INST_H
|
||||
|
||||
/*
|
||||
* includes
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include "orte/orte_constants.h"
|
||||
|
||||
#include "orte/mca/ns/ns_types.h"
|
||||
#include "orte/mca/gpr/gpr_types.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/ras/ras_types.h"
|
||||
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
|
||||
/*
|
||||
* Functions for use solely within the RMAPS framework
|
||||
*/
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
/*
|
||||
* orte_mapped_proc_t
|
||||
*/
|
||||
static void orte_rmaps_mapped_proc_construct(orte_mapped_proc_t* proc)
|
||||
{
|
||||
proc->name.cellid = ORTE_CELLID_INVALID;
|
||||
proc->name.jobid = ORTE_JOBID_INVALID;
|
||||
proc->name.vpid = ORTE_VPID_INVALID;
|
||||
proc->rank = 0;
|
||||
proc->app_idx = 0;
|
||||
proc->pid = 0;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_mapped_proc_t,
|
||||
opal_list_item_t,
|
||||
orte_rmaps_mapped_proc_construct, NULL);
|
||||
|
||||
/*
|
||||
* orte_mapped_node_t
|
||||
*/
|
||||
static void orte_rmaps_mapped_node_construct(orte_mapped_node_t* node)
|
||||
{
|
||||
node->nodename = NULL;
|
||||
node->username = NULL;
|
||||
node->daemon = NULL;
|
||||
node->oversubscribed = false;
|
||||
OBJ_CONSTRUCT(&node->procs, opal_list_t);
|
||||
}
|
||||
|
||||
static void orte_rmaps_mapped_node_destruct(orte_mapped_node_t* node)
|
||||
{
|
||||
opal_list_item_t* item;
|
||||
|
||||
if (NULL != node->nodename) {
|
||||
free(node->nodename);
|
||||
}
|
||||
|
||||
if (NULL != node->username) {
|
||||
free(node->username);
|
||||
}
|
||||
|
||||
if (NULL != node->daemon) {
|
||||
free(node->daemon);
|
||||
}
|
||||
|
||||
while (NULL != (item = opal_list_remove_first(&node->procs))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&node->procs);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_mapped_node_t,
|
||||
opal_list_item_t,
|
||||
orte_rmaps_mapped_node_construct,
|
||||
orte_rmaps_mapped_node_destruct);
|
||||
|
||||
/*
|
||||
* orte_job_map_t
|
||||
*/
|
||||
|
||||
static void orte_rmaps_job_map_construct(orte_job_map_t* map)
|
||||
{
|
||||
map->job = ORTE_JOBID_INVALID;
|
||||
map->num_apps = 0;
|
||||
map->apps = NULL;
|
||||
OBJ_CONSTRUCT(&map->nodes, opal_list_t);
|
||||
}
|
||||
|
||||
static void orte_rmaps_job_map_destruct(orte_job_map_t* map)
|
||||
{
|
||||
orte_std_cntr_t i=0;
|
||||
opal_list_item_t* item;
|
||||
|
||||
for(i=0; i < map->num_apps; i++) {
|
||||
if (NULL != map->apps[i]) OBJ_RELEASE(map->apps[i]);
|
||||
}
|
||||
if (NULL != map->apps) {
|
||||
free(map->apps);
|
||||
}
|
||||
|
||||
while (NULL != (item = opal_list_remove_first(&map->nodes))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&map->nodes);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_job_map_t,
|
||||
opal_list_item_t,
|
||||
orte_rmaps_job_map_construct,
|
||||
orte_rmaps_job_map_destruct);
|
||||
|
||||
|
||||
/*
|
||||
* external API functions will be documented in the mca/rmaps/rmaps.h file
|
||||
*/
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif
|
@ -30,6 +30,7 @@
|
||||
#include "orte/mca/ns/ns_types.h"
|
||||
#include "orte/mca/gpr/gpr_types.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/ras/ras_types.h"
|
||||
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
|
||||
@ -67,15 +68,47 @@ OBJ_CLASS_DECLARATION(orte_rmaps_base_cmp_t);
|
||||
|
||||
|
||||
/*
|
||||
* Base functions
|
||||
* Base API functions
|
||||
*/
|
||||
|
||||
int orte_rmaps_base_map(orte_jobid_t job, char *desired_mapper);
|
||||
|
||||
/*
|
||||
* NO_OP functions
|
||||
* Map a job
|
||||
* All calls to rmaps.map_job are routed through this function. This allows callers to
|
||||
* the RMAPS framework to specify the particular mapper they wish to use.
|
||||
*/
|
||||
int orte_rmaps_base_map_no_op(orte_jobid_t job, char *desired_mapper);
|
||||
int orte_rmaps_base_map_job(orte_jobid_t job, char *desired_mapper);
|
||||
|
||||
/*
|
||||
* Get job map
|
||||
* Retrieve the information for a job map from the registry and reassemble it into
|
||||
* an job_map object. Memory for the job_map object and all of its elements is
|
||||
* allocated by the function
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t job);
|
||||
|
||||
/*
|
||||
* Get node map
|
||||
* Retrieve the information for a job map from the registry and provide the info
|
||||
* for the specified node
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell,
|
||||
char *nodename, orte_jobid_t job);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Registry functions for maps
|
||||
*/
|
||||
/*
|
||||
* Put job map
|
||||
* Given a pointer to an orte_job_map_t, place the map's information on
|
||||
* the registry. Info is entered into the containers for each individual process on
|
||||
* the job's segment. Additionally, the function sets the INIT counter to the number
|
||||
* of processes in the map, thus causing the INIT trigger to fire so that any
|
||||
* attached subscriptions can be serviced.
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_rmaps_base_put_job_map(orte_job_map_t *map);
|
||||
|
||||
|
||||
/*
|
||||
* communication functions
|
||||
@ -89,55 +122,60 @@ void orte_rmaps_base_recv(int status, orte_process_name_t* sender,
|
||||
/*
|
||||
* Internal support functions
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_rmaps_base_mapped_node_query(opal_list_t* mapping_list, opal_list_t* nodes_alloc, orte_jobid_t jobid);
|
||||
ORTE_DECLSPEC int orte_rmaps_base_get_map(orte_jobid_t, opal_list_t* mapping);
|
||||
ORTE_DECLSPEC int orte_rmaps_base_set_map(orte_jobid_t, opal_list_t* mapping);
|
||||
ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_cellid_t, orte_jobid_t, const char*, opal_list_t* mapping);
|
||||
/*
|
||||
* Function to add a mapped_proc entry to a map
|
||||
* Scans list of nodes on map to see if the specified one already
|
||||
* exists - if so, just add this entry to that node's list of
|
||||
* procs. If not, then add new node entry and put this proc
|
||||
* on its list.
|
||||
*/
|
||||
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename,
|
||||
char *username, bool oversubscribed, orte_mapped_proc_t *proc);
|
||||
|
||||
ORTE_DECLSPEC int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots);
|
||||
ORTE_DECLSPEC int orte_rmaps_base_update_node_usage(opal_list_t *nodes);
|
||||
ORTE_DECLSPEC int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
|
||||
orte_app_context_t *app,
|
||||
opal_list_t *master_node_list,
|
||||
orte_std_cntr_t *total_num_slots);
|
||||
int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots);
|
||||
int orte_rmaps_base_update_node_usage(opal_list_t *nodes);
|
||||
int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
|
||||
orte_app_context_t *app,
|
||||
opal_list_t *master_node_list,
|
||||
orte_std_cntr_t *total_num_slots);
|
||||
|
||||
ORTE_DECLSPEC int orte_rmaps_base_claim_slot(orte_rmaps_base_map_t *map,
|
||||
orte_ras_node_t *current_node,
|
||||
orte_jobid_t jobid, orte_vpid_t vpid,
|
||||
int proc_index,
|
||||
opal_list_t *nodes,
|
||||
opal_list_t *fully_used_nodes);
|
||||
int orte_rmaps_base_claim_slot(orte_job_map_t *map,
|
||||
orte_ras_node_t *current_node,
|
||||
orte_jobid_t jobid, orte_vpid_t vpid,
|
||||
orte_std_cntr_t app_idx,
|
||||
opal_list_t *nodes,
|
||||
opal_list_t *fully_used_nodes);
|
||||
|
||||
/** Local data type functions */
|
||||
void orte_rmaps_base_std_obj_release(orte_data_value_t *value);
|
||||
|
||||
/* JOB_MAP */
|
||||
int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_compare_map(orte_rmaps_base_map_t *value1, orte_rmaps_base_map_t *value2, orte_data_type_t type);
|
||||
int orte_rmaps_base_copy_map(orte_job_map_t **dest, orte_job_map_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_compare_map(orte_job_map_t *value1, orte_job_map_t *value2, orte_data_type_t type);
|
||||
int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
|
||||
orte_std_cntr_t num_vals, orte_data_type_t type);
|
||||
int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_size_map(size_t *size, orte_rmaps_base_map_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_print_map(char **output, char *prefix, orte_job_map_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_size_map(size_t *size, orte_job_map_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest,
|
||||
orte_std_cntr_t *num_vals, orte_data_type_t type);
|
||||
|
||||
/* MAPPED_PROC */
|
||||
int orte_rmaps_base_copy_mapped_proc(orte_rmaps_base_proc_t **dest, orte_rmaps_base_proc_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_compare_mapped_proc(orte_rmaps_base_proc_t *value1, orte_rmaps_base_proc_t *value2, orte_data_type_t type);
|
||||
int orte_rmaps_base_copy_mapped_proc(orte_mapped_proc_t **dest, orte_mapped_proc_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_compare_mapped_proc(orte_mapped_proc_t *value1, orte_mapped_proc_t *value2, orte_data_type_t type);
|
||||
int orte_rmaps_base_pack_mapped_proc(orte_buffer_t *buffer, void *src,
|
||||
orte_std_cntr_t num_vals, orte_data_type_t type);
|
||||
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_base_proc_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_rmaps_base_proc_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_mapped_proc_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_mapped_proc_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
|
||||
orte_std_cntr_t *num_vals, orte_data_type_t type);
|
||||
|
||||
/* MAPPED_NODE */
|
||||
int orte_rmaps_base_copy_mapped_node(orte_rmaps_base_node_t **dest, orte_rmaps_base_node_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_compare_mapped_node(orte_rmaps_base_node_t *value1, orte_rmaps_base_node_t *value2, orte_data_type_t type);
|
||||
int orte_rmaps_base_copy_mapped_node(orte_mapped_node_t **dest, orte_mapped_node_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_compare_mapped_node(orte_mapped_node_t *value1, orte_mapped_node_t *value2, orte_data_type_t type);
|
||||
int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
|
||||
orte_std_cntr_t num_vals, orte_data_type_t type);
|
||||
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_base_node_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_size_mapped_node(size_t *size, orte_rmaps_base_node_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_node_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_size_mapped_node(size_t *size, orte_mapped_node_t *src, orte_data_type_t type);
|
||||
int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
|
||||
orte_std_cntr_t *num_vals, orte_data_type_t type);
|
||||
|
||||
|
@ -69,6 +69,8 @@ orte_rmaps_base_component_t mca_rmaps_proxy_component = {
|
||||
*/
|
||||
static orte_rmaps_base_module_t orte_rmaps_proxy = {
|
||||
orte_rmaps_proxy_map,
|
||||
orte_rmaps_base_get_job_map,
|
||||
orte_rmaps_base_get_node_map,
|
||||
orte_rmaps_proxy_finalize
|
||||
};
|
||||
|
||||
|
@ -58,6 +58,18 @@
|
||||
*/
|
||||
typedef int (*orte_rmaps_base_module_map_fn_t)(orte_jobid_t job, char *desired_mapper);
|
||||
|
||||
/**
|
||||
* Get the map of a job from the registry
|
||||
*/
|
||||
typedef int (*orte_rmaps_base_module_get_job_map_fn_t)(orte_job_map_t **map, orte_jobid_t job);
|
||||
|
||||
/**
|
||||
* Get the map for a job on a specific node from the registry. Providing a jobid of
|
||||
* ORTE_JOBID_WILDCARD will return the map of all processes on that node
|
||||
*/
|
||||
typedef int (*orte_rmaps_base_module_get_node_map_fn_t)(orte_mapped_node_t **node, orte_cellid_t cell,
|
||||
char *nodename, orte_jobid_t job);
|
||||
|
||||
/**
|
||||
* Cleanup module resources.
|
||||
*/
|
||||
@ -67,10 +79,14 @@ typedef int (*orte_rmaps_base_module_finalize_fn_t)(void);
|
||||
* rmaps module version 1.3.0
|
||||
*/
|
||||
struct orte_rmaps_base_module_1_3_0_t {
|
||||
/** Maping function pointer */
|
||||
orte_rmaps_base_module_map_fn_t map_job;
|
||||
/** Mapping function pointer */
|
||||
orte_rmaps_base_module_map_fn_t map_job;
|
||||
/** Get job map pointer */
|
||||
orte_rmaps_base_module_get_job_map_fn_t get_job_map;
|
||||
/** Node map pointer */
|
||||
orte_rmaps_base_module_get_node_map_fn_t get_node_map;
|
||||
/** Finalization function pointer */
|
||||
orte_rmaps_base_module_finalize_fn_t finalize;
|
||||
orte_rmaps_base_module_finalize_fn_t finalize;
|
||||
};
|
||||
/** Convenience typedef */
|
||||
typedef struct orte_rmaps_base_module_1_3_0_t orte_rmaps_base_module_1_3_0_t;
|
||||
|
@ -24,11 +24,7 @@
|
||||
#include "orte/orte_constants.h"
|
||||
|
||||
#include "orte/mca/ns/ns_types.h"
|
||||
#include "orte/mca/gpr/gpr_types.h"
|
||||
#include "orte/mca/ras/ras_types.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
#include "orte/mca/rmgr/rmgr_types.h"
|
||||
|
||||
/*
|
||||
* General MAP types
|
||||
@ -36,55 +32,51 @@
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**** JOB_MAP OBJECTS ***/
|
||||
/*
|
||||
* Mapped process info for job_map
|
||||
*/
|
||||
struct orte_mapped_proc_t {
|
||||
opal_list_item_t super;
|
||||
orte_process_name_t name; /* process name */
|
||||
orte_std_cntr_t rank; /* process rank */
|
||||
orte_std_cntr_t app_idx; /* index of app_context for this process */
|
||||
pid_t pid;
|
||||
};
|
||||
typedef struct orte_mapped_proc_t orte_mapped_proc_t;
|
||||
OBJ_CLASS_DECLARATION(orte_mapped_proc_t);
|
||||
|
||||
/*
|
||||
* Mapping of nodes to process ranks.
|
||||
*/
|
||||
|
||||
struct orte_rmaps_base_node_t {
|
||||
struct orte_mapped_node_t {
|
||||
opal_list_item_t super;
|
||||
orte_ras_node_t* node;
|
||||
opal_list_t node_procs; /* list of rmaps_base_proc_t */
|
||||
orte_cellid_t cell; /* cell where this node is located */
|
||||
char *nodename; /* name of node */
|
||||
char *username;
|
||||
orte_process_name_t *daemon; /* name of the daemon on this node
|
||||
* NULL => daemon not assigned yet
|
||||
*/
|
||||
bool oversubscribed; /* whether or not the #procs > #processors */
|
||||
opal_list_t procs; /* list of mapped_proc objects on this node */
|
||||
};
|
||||
typedef struct orte_rmaps_base_node_t orte_rmaps_base_node_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(orte_rmaps_base_node_t);
|
||||
|
||||
typedef struct orte_mapped_node_t orte_mapped_node_t;
|
||||
OBJ_CLASS_DECLARATION(orte_mapped_node_t);
|
||||
|
||||
/*
|
||||
* Mapping of a process rank to a specific node.
|
||||
*/
|
||||
|
||||
struct orte_rmaps_base_proc_t {
|
||||
opal_list_item_t super;
|
||||
char *app; /* name of executable */
|
||||
orte_rmaps_base_node_t* proc_node;
|
||||
orte_process_name_t proc_name;
|
||||
orte_std_cntr_t proc_rank;
|
||||
pid_t pid; /* PLS-assigned pid */
|
||||
pid_t local_pid; /* pid found by local process */
|
||||
};
|
||||
typedef struct orte_rmaps_base_proc_t orte_rmaps_base_proc_t;
|
||||
|
||||
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_proc_t);
|
||||
|
||||
|
||||
/*
|
||||
* Structure that represents the mapping of an application to an
|
||||
* Structure that represents the mapping of a job to an
|
||||
* allocated set of resources.
|
||||
*/
|
||||
|
||||
struct orte_rmaps_base_map_t {
|
||||
opal_list_item_t super;
|
||||
orte_app_context_t *app;
|
||||
orte_rmaps_base_proc_t** procs;
|
||||
orte_std_cntr_t num_procs;
|
||||
opal_list_t nodes; /* list of rmaps_base_node_t */
|
||||
struct orte_job_map_t {
|
||||
opal_object_t super;
|
||||
orte_jobid_t job;
|
||||
orte_std_cntr_t num_apps; /* number of app_contexts */
|
||||
orte_app_context_t **apps; /* the array of app_contexts for this job */
|
||||
opal_list_t nodes; /* list of mapped_node_t */
|
||||
};
|
||||
typedef struct orte_rmaps_base_map_t orte_rmaps_base_map_t;
|
||||
|
||||
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_map_t);
|
||||
|
||||
typedef struct orte_job_map_t orte_job_map_t;
|
||||
OBJ_CLASS_DECLARATION(orte_job_map_t);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
|
@ -31,6 +31,7 @@
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/trace.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/argv.h"
|
||||
|
||||
@ -56,7 +57,7 @@ static opal_list_t fully_used_nodes;
|
||||
*/
|
||||
static int map_app_by_node(
|
||||
orte_app_context_t* app,
|
||||
orte_rmaps_base_map_t* map,
|
||||
orte_job_map_t* map,
|
||||
orte_jobid_t jobid,
|
||||
orte_vpid_t vpid_start,
|
||||
opal_list_t* nodes,
|
||||
@ -66,8 +67,9 @@ static int map_app_by_node(
|
||||
orte_std_cntr_t num_alloc = 0;
|
||||
opal_list_item_t *next;
|
||||
orte_ras_node_t *node;
|
||||
|
||||
|
||||
OPAL_TRACE(2);
|
||||
|
||||
/* This loop continues until all procs have been mapped or we run
|
||||
out of resources. We determine that we have "run out of
|
||||
resources" when all nodes have node_slots_max processes mapped to them,
|
||||
@ -110,7 +112,7 @@ static int map_app_by_node(
|
||||
|
||||
/* Allocate a slot on this node */
|
||||
node = (orte_ras_node_t*) cur_node_item;
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, num_alloc,
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, app->idx,
|
||||
nodes, max_used_nodes))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -121,8 +123,6 @@ static int map_app_by_node(
|
||||
cur_node_item = next;
|
||||
}
|
||||
|
||||
map->num_procs = num_alloc;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -133,7 +133,7 @@ static int map_app_by_node(
|
||||
*/
|
||||
static int map_app_by_slot(
|
||||
orte_app_context_t* app,
|
||||
orte_rmaps_base_map_t* map,
|
||||
orte_job_map_t* map,
|
||||
orte_jobid_t jobid,
|
||||
orte_vpid_t vpid_start,
|
||||
opal_list_t* nodes,
|
||||
@ -145,7 +145,8 @@ static int map_app_by_slot(
|
||||
orte_ras_node_t *node;
|
||||
opal_list_item_t *next;
|
||||
|
||||
|
||||
OPAL_TRACE(2);
|
||||
|
||||
/* This loop continues until all procs have been mapped or we run
|
||||
out of resources. We determine that we have "run out of
|
||||
resources" when either all nodes have node_slots_max processes mapped to them,
|
||||
@ -195,7 +196,7 @@ static int map_app_by_slot(
|
||||
num_slots_to_take = (node->node_slots == 0) ? 1 : node->node_slots;
|
||||
|
||||
for( i = 0; i < num_slots_to_take; ++i) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, num_alloc,
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, app->idx,
|
||||
nodes, max_used_nodes))) {
|
||||
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
|
||||
* really isn't an error - we just need to break from the loop
|
||||
@ -223,8 +224,6 @@ static int map_app_by_slot(
|
||||
|
||||
}
|
||||
|
||||
map->num_procs = num_alloc;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -235,11 +234,10 @@ static int map_app_by_slot(
|
||||
|
||||
static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
|
||||
{
|
||||
orte_app_context_t** context, *app;
|
||||
orte_rmaps_base_map_t* map;
|
||||
orte_std_cntr_t i, num_context;
|
||||
orte_app_context_t *app;
|
||||
orte_job_map_t* map;
|
||||
orte_std_cntr_t i;
|
||||
opal_list_t master_node_list, mapped_node_list, max_used_nodes, *working_node_list;
|
||||
opal_list_t mapping;
|
||||
opal_list_item_t *item, *item2;
|
||||
orte_ras_node_t *node, *node2;
|
||||
orte_vpid_t vpid_start, job_vpid_start=0;
|
||||
@ -247,8 +245,20 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
|
||||
int rc;
|
||||
bool bynode = true, modify_app_context = false;
|
||||
|
||||
OPAL_TRACE(1);
|
||||
|
||||
/* create the map object */
|
||||
map = OBJ_NEW(orte_job_map_t);
|
||||
if (NULL == map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* set the jobid */
|
||||
map->job = jobid;
|
||||
|
||||
/* query for the application context and allocated nodes */
|
||||
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &context, &num_context))) {
|
||||
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &(map->apps), &(map->num_apps)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
@ -271,11 +281,6 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* construct a mapping for the job - the list will hold mappings for each
|
||||
* application context
|
||||
*/
|
||||
OBJ_CONSTRUCT(&mapping, opal_list_t);
|
||||
|
||||
/** initialize the cur_node_item to point to the first node in the list */
|
||||
cur_node_item = opal_list_get_first(&master_node_list);
|
||||
|
||||
@ -298,30 +303,20 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
|
||||
/** construct a list to hold any nodes involved in a user-specified mapping */
|
||||
OBJ_CONSTRUCT(&mapped_node_list, opal_list_t);
|
||||
|
||||
for(i=0; i<num_context; i++) {
|
||||
app = context[i];
|
||||
for(i=0; i < map->num_apps; i++) {
|
||||
app = map->apps[i];
|
||||
|
||||
/** if the number of processes wasn't specified, then we know there can be only
|
||||
* one app_context allowed in the launch, and that we are to launch it across
|
||||
* all available slots. We'll double-check the single app_context rule first
|
||||
*/
|
||||
if (0 == app->num_procs && 1 < num_context) {
|
||||
if (0 == app->num_procs && 1 < map->num_apps) {
|
||||
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np",
|
||||
true, num_context, NULL);
|
||||
true, map->num_apps, NULL);
|
||||
ORTE_ERROR_LOG(ORTE_ERR_INVALID_NUM_PROCS);
|
||||
return ORTE_ERR_INVALID_NUM_PROCS;
|
||||
}
|
||||
|
||||
/** create a map for this app_context */
|
||||
map = OBJ_NEW(orte_rmaps_base_map_t);
|
||||
if(NULL == map) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
goto cleanup;
|
||||
}
|
||||
/** add it to the list of mappings for the job */
|
||||
opal_list_append(&mapping, &map->super);
|
||||
|
||||
if ( 0 < app->num_map ) {
|
||||
/** If the user has specified a mapping for this app_context, then we
|
||||
* create a working node list that contains only those nodes.
|
||||
@ -355,15 +350,6 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
map->app = app;
|
||||
map->procs = (orte_rmaps_base_proc_t**)malloc(sizeof(orte_rmaps_base_proc_t*) * app->num_procs);
|
||||
if(NULL == map->procs) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* allocate a vpid range for this app within the job */
|
||||
if(ORTE_SUCCESS != (rc = orte_ns.reserve_range(jobid, app->num_procs, &vpid_start))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -467,7 +453,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
|
||||
}
|
||||
|
||||
/* save mapping to the registry */
|
||||
if(ORTE_SUCCESS != (rc = orte_rmaps_base_set_map(jobid, &mapping))) {
|
||||
if(ORTE_SUCCESS != (rc = orte_rmaps_base_put_job_map(map))) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -493,7 +479,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
|
||||
processes
|
||||
*/
|
||||
if (modify_app_context) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmgr.store_app_context(jobid, context, 1))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmgr.store_app_context(jobid, map->apps, 1))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
}
|
||||
@ -505,11 +491,6 @@ cleanup:
|
||||
}
|
||||
OBJ_DESTRUCT(&master_node_list);
|
||||
|
||||
while(NULL != (item = opal_list_remove_first(&mapping))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&mapping);
|
||||
OBJ_DESTRUCT(&max_used_nodes);
|
||||
OBJ_DESTRUCT(&fully_used_nodes);
|
||||
OBJ_DESTRUCT(&mapped_node_list);
|
||||
@ -526,6 +507,8 @@ static int orte_rmaps_rr_finalize(void)
|
||||
|
||||
orte_rmaps_base_module_t orte_rmaps_round_robin_module = {
|
||||
orte_rmaps_rr_map,
|
||||
orte_rmaps_base_get_job_map,
|
||||
orte_rmaps_base_get_node_map,
|
||||
orte_rmaps_rr_finalize
|
||||
};
|
||||
|
||||
|
@ -62,6 +62,7 @@
|
||||
#define ORTE_NODE_ALLOC_KEY "orte-node-alloc"
|
||||
#define ORTE_NODE_BOOTPROXY_KEY "orte-node-bootproxy"
|
||||
#define ORTE_NODE_USERNAME_KEY "orte-node-username"
|
||||
#define ORTE_NODE_OVERSUBSCRIBED_KEY "orte-node-oversubscribed"
|
||||
#define ORTE_JOB_APP_CONTEXT_KEY "orte-job-app-context"
|
||||
#define ORTE_JOB_SLOTS_KEY "orte-job-slots" /**< number of procs in job */
|
||||
#define ORTE_JOB_VPID_START_KEY "orte-job-vpid-start"
|
||||
|
@ -66,7 +66,7 @@ extern char **environ;
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/rmgr/rmgr_types.h"
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "totalview.h"
|
||||
|
||||
@ -333,8 +333,11 @@ void orte_totalview_init_before_spawn(void)
|
||||
*/
|
||||
void orte_totalview_init_after_spawn(orte_jobid_t jobid)
|
||||
{
|
||||
opal_list_t list_of_resource_maps;
|
||||
opal_list_item_t *item;
|
||||
orte_job_map_t *map;
|
||||
opal_list_item_t *item, *item2;
|
||||
orte_mapped_node_t *node;
|
||||
orte_mapped_proc_t *proc;
|
||||
orte_app_context_t *appctx;
|
||||
orte_std_cntr_t i;
|
||||
int rc;
|
||||
|
||||
@ -364,23 +367,18 @@ void orte_totalview_init_after_spawn(orte_jobid_t jobid)
|
||||
|
||||
MPIR_debug_state = 1;
|
||||
|
||||
OBJ_CONSTRUCT(&list_of_resource_maps, opal_list_t);
|
||||
/* Get the resource map for this job */
|
||||
|
||||
/* Get a list of the resource maps for this job */
|
||||
|
||||
rc = orte_rmaps_base_get_map(jobid, &list_of_resource_maps);
|
||||
rc = orte_rmaps.get_job_map(&map, jobid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "Error: Can't get list of resource maps\n");
|
||||
opal_output(0, "Error: Can't get resource map\n");
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
/* find the total number of processes in the job */
|
||||
|
||||
for (item = opal_list_get_first(&list_of_resource_maps);
|
||||
item != opal_list_get_end(&list_of_resource_maps);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rmaps_base_map_t *map = (orte_rmaps_base_map_t*) item;
|
||||
MPIR_proctable_size += map->num_procs;
|
||||
for (i=0; i < map->num_apps; i++) {
|
||||
MPIR_proctable_size += map->apps[i]->num_procs;
|
||||
}
|
||||
|
||||
/* allocate MPIR_proctable */
|
||||
@ -389,27 +387,34 @@ void orte_totalview_init_after_spawn(orte_jobid_t jobid)
|
||||
MPIR_proctable_size);
|
||||
if (MPIR_proctable == NULL) {
|
||||
opal_output(0, "Error: Out of memory\n");
|
||||
OBJ_DESTRUCT(&list_of_resource_maps);
|
||||
OBJ_RELEASE(map);
|
||||
}
|
||||
|
||||
/* initialize MPIR_proctable */
|
||||
|
||||
for (item = opal_list_get_first(&list_of_resource_maps);
|
||||
item != opal_list_get_end(&list_of_resource_maps);
|
||||
i=0;
|
||||
for (item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_rmaps_base_map_t *map = (orte_rmaps_base_map_t*) item;
|
||||
for (i = 0; i < map->num_procs; i++) {
|
||||
orte_rmaps_base_proc_t *proc = map->procs[i];
|
||||
MPIR_proctable[i].host_name = proc->proc_node->node->node_name;
|
||||
node = (orte_mapped_node_t*)item;
|
||||
|
||||
for (item2 = opal_list_get_first(&node->procs);
|
||||
item2 != opal_list_get_end(&node->procs);
|
||||
item2 = opal_list_get_next(item2)) {
|
||||
proc = (orte_mapped_proc_t*)item2;
|
||||
appctx = map->apps[proc->app_idx];
|
||||
|
||||
MPIR_proctable[i].host_name = strdup(node->nodename);
|
||||
MPIR_proctable[i].executable_name =
|
||||
opal_os_path( false, map->app->cwd, proc->app, NULL );
|
||||
MPIR_proctable[i].pid = proc->local_pid;
|
||||
opal_os_path( false, appctx->cwd, appctx->app, NULL );
|
||||
MPIR_proctable[i].pid = proc->pid;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&list_of_resource_maps);
|
||||
|
||||
OBJ_RELEASE(map);
|
||||
}
|
||||
|
||||
if (orte_debug_flag) {
|
||||
dump();
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user