1
1

Bring the map fixes into the main trunk. This should fix several problems, including the multiple app_context issue.

I have tested on rsh, slurm, bproc, and tm. Bproc continues to have a problem (will be asking for help there).

Gridengine compiles but I cannot test (believe it likely will run).

Poe and xgrid compile to the extent they can without the proper include files.

This commit was SVN r12059.
Этот коммит содержится в:
Ralph Castain 2006-10-07 15:45:24 +00:00
родитель 5dbe5c7442
Коммит ae79894bad
34 изменённых файлов: 2176 добавлений и 2727 удалений

Просмотреть файл

@ -259,7 +259,7 @@ int orte_errmgr_bproc_register_job(orte_jobid_t job)
} }
/* send the request */ /* send the request */
if (0 > orte_rml.send_buffer(orte_errmgr_proxy_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) { if (0 > orte_rml.send_buffer(orte_errmgr_bproc_globals.replica, cmd, ORTE_RML_TAG_ERRMGR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(cmd); OBJ_RELEASE(cmd);
return ORTE_ERR_COMM_FAILURE; return ORTE_ERR_COMM_FAILURE;
@ -274,7 +274,7 @@ int orte_errmgr_bproc_register_job(orte_jobid_t job)
} }
/* enter a blocking receive until we hear back */ /* enter a blocking receive until we hear back */
if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_ERRMGR)) { if (0 > orte_rml.recv_buffer(orte_errmgr_bproc_globals.replica, answer, ORTE_RML_TAG_ERRMGR)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer); OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE; return ORTE_ERR_COMM_FAILURE;

Просмотреть файл

@ -65,9 +65,14 @@ int orte_ns_base_print_name(char **output, char *prefix, orte_process_name_t *na
/* set default result */ /* set default result */
*output = NULL; *output = NULL;
if (NULL == name) {
asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: NULL",
(NULL == prefix ? " " : prefix));
} else {
asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: [%lu,%lu,%lu]", asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: [%lu,%lu,%lu]",
(NULL == prefix ? " " : prefix), (unsigned long)name->cellid, (NULL == prefix ? " " : prefix), (unsigned long)name->cellid,
(unsigned long)name->jobid, (unsigned long)name->vpid); (unsigned long)name->jobid, (unsigned long)name->vpid);
}
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -137,15 +137,3 @@ int orte_odls_bproc_component_close(void)
OBJ_DESTRUCT(&mca_odls_bproc_component.children); OBJ_DESTRUCT(&mca_odls_bproc_component.children);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
int orte_odls_bproc_component_finalize(void)
{
opal_list_item_t *item;
/* cleanup state */
while (NULL != (item = opal_list_remove_first(&mca_odls_bproc_component.children))) {
OBJ_RELEASE(item);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -53,7 +53,7 @@ OBJ_CLASS_INSTANCE(orte_pls_daemon_info_t, /* type name */
/* /*
* Store the active daemons for a job * Store the active daemons for a job
*/ */
int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job) int orte_pls_base_store_active_daemons(opal_list_t *daemons)
{ {
orte_pls_daemon_info_t *dmn; orte_pls_daemon_info_t *dmn;
opal_list_item_t *item; opal_list_item_t *item;
@ -64,6 +64,10 @@ int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
/* determine the number of daemons */ /* determine the number of daemons */
num_daemons = opal_list_get_size(daemons); num_daemons = opal_list_get_size(daemons);
if (0 == num_daemons) {
return ORTE_SUCCESS;
}
/* since each daemon gets recorded in a separate node's container, /* since each daemon gets recorded in a separate node's container,
* we need to allocate space for num_daemons value objects * we need to allocate space for num_daemons value objects
*/ */
@ -74,15 +78,6 @@ int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
} }
memset(values, 0, num_daemons*sizeof(orte_gpr_value_t*)); /* NULL the array */ memset(values, 0, num_daemons*sizeof(orte_gpr_value_t*)); /* NULL the array */
/* setup the key */
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, job))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
return rc;
}
asprintf(&key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);
free(jobid_string);
/* loop through the values and the list and create all the value objects */ /* loop through the values and the list and create all the value objects */
item = opal_list_get_first(daemons); item = opal_list_get_first(daemons);
for (i=0; i < num_daemons; i++) { for (i=0; i < num_daemons; i++) {
@ -102,6 +97,15 @@ int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job)
goto CLEANUP; goto CLEANUP;
} }
/* setup the key */
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, dmn->active_job))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
return rc;
}
asprintf(&key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string);
free(jobid_string);
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[0]), key, ORTE_NAME, dmn->name))) { if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[0]), key, ORTE_NAME, dmn->name))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; goto CLEANUP;
@ -140,7 +144,10 @@ int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job)
NULL NULL
}; };
orte_cellid_t *cell; orte_cellid_t *cell;
char *nodename;
orte_process_name_t *name;
orte_pls_daemon_info_t *dmn; orte_pls_daemon_info_t *dmn;
bool found_name, found_node, found_cell;
int rc; int rc;
/* setup the key */ /* setup the key */
@ -164,27 +171,29 @@ int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job)
/* loop through the answers and construct the list */ /* loop through the answers and construct the list */
for (i=0; i < cnt; i++) { for (i=0; i < cnt; i++) {
/* each container should have only one set of values */ /* for systems such as bproc, the node segment holds containers
dmn = OBJ_NEW(orte_pls_daemon_info_t); * for nodes that we may not have launched upon. Each container
if (NULL == dmn) { * will send us back a value object, so we have to ensure here
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); * that we only create daemon objects on the list for those nodes
goto CLEANUP; * that DO provide a valid object
} */
found_name = found_node = found_cell = false;
for (j=0; j < values[i]->cnt; j++) { for (j=0; j < values[i]->cnt; j++) {
kv = values[i]->keyvals[j]; kv = values[i]->keyvals[j];
if (0 == strcmp(kv->key, keys[0])) { if (0 == strcmp(kv->key, keys[0])) {
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), kv->value->data, ORTE_NAME))) { if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&name, kv->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; goto CLEANUP;
} }
found_name = true;
continue; continue;
} }
if (0 == strcmp(kv->key, ORTE_NODE_NAME_KEY)) { if (0 == strcmp(kv->key, ORTE_NODE_NAME_KEY)) {
/* use the dss.copy function here to protect us against zero-length strings */ if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&nodename, kv->value, ORTE_STRING))) {
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->nodename), kv->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; goto CLEANUP;
} }
found_node = true;
continue; continue;
} }
if (0 == strcmp(kv->key, ORTE_CELLID_KEY)) { if (0 == strcmp(kv->key, ORTE_CELLID_KEY)) {
@ -192,12 +201,32 @@ int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job)
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; goto CLEANUP;
} }
dmn->cell = *cell; found_cell = true;
continue; continue;
} }
} }
/* if we found everything, then this is a valid entry - create
* it and add it to the list
*/
if (found_name && found_node && found_cell) {
dmn = OBJ_NEW(orte_pls_daemon_info_t);
if (NULL == dmn) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(dmn);
goto CLEANUP;
}
dmn->cell = *cell;
if (NULL != nodename) {
dmn->nodename = strdup(nodename);
}
/* add this daemon to the list */ /* add this daemon to the list */
opal_list_append(daemons, &dmn->super); opal_list_append(daemons, &dmn->super);
}
OBJ_RELEASE(values[i]); OBJ_RELEASE(values[i]);
} }
@ -212,5 +241,23 @@ CLEANUP:
} }
/* /*
* Retrieve the active daemon(s) for a specific node * Remove a daemon from the world of active daemons
*/ */
int orte_pls_base_remove_daemon(orte_pls_daemon_info_t *info)
{
opal_list_t daemons;
int rc;
OBJ_CONSTRUCT(&daemons, opal_list_t);
/* We actually don't want to do this - instead, we need to do a registry
* delete function call targeting this entry
*/
if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, info->active_job))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* find this item in the list */
return ORTE_SUCCESS;
}

Просмотреть файл

@ -58,6 +58,7 @@ int orte_pls_base_orted_exit(opal_list_t *daemons)
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
dmn = (orte_pls_daemon_info_t*)item; dmn = (orte_pls_daemon_info_t*)item;
opal_output(0, "sending exit cmd to daemon [%ld,%ld,%ld]", ORTE_NAME_ARGS(dmn->name));
if (0 > orte_rml.send_buffer(dmn->name, &cmd, ORTE_RML_TAG_PLS_ORTED, 0)) { if (0 > orte_rml.send_buffer(dmn->name, &cmd, ORTE_RML_TAG_PLS_ORTED, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_DESTRUCT(&cmd); OBJ_DESTRUCT(&cmd);

Просмотреть файл

@ -80,7 +80,8 @@ extern "C" {
int orte_pls_base_orted_add_local_procs(opal_list_t *daemons, orte_gpr_notify_data_t *ndat); int orte_pls_base_orted_add_local_procs(opal_list_t *daemons, orte_gpr_notify_data_t *ndat);
int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job); int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job);
int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job); int orte_pls_base_store_active_daemons(opal_list_t *daemons);
int orte_pls_base_remove_daemon(orte_pls_daemon_info_t *info);
/* /*
* communications utilities * communications utilities

Просмотреть файл

@ -60,17 +60,14 @@
#include "orte/mca/ns/ns.h" #include "orte/mca/ns/ns.h"
#include "orte/mca/sds/base/base.h" #include "orte/mca/sds/base/base.h"
#include "orte/mca/oob/base/base.h" #include "orte/mca/oob/base/base.h"
#include "orte/mca/ras/base/base.h" #include "orte/mca/ras/ras.h"
#include "orte/mca/rmgr/rmgr.h" #include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/rmaps/rmaps_types.h" #include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml.h"
#include "orte/mca/smr/smr.h" #include "orte/mca/smr/smr.h"
#include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_wait.h"
#include "orte/runtime/runtime.h" #include "orte/runtime/runtime.h"
/* remove this when moved to 2.0 */
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/pls/base/pls_private.h" #include "orte/mca/pls/base/pls_private.h"
#include "pls_bproc.h" #include "pls_bproc.h"
@ -104,7 +101,7 @@ orte_pls_base_module_t orte_pls_bproc_module = {
}; };
static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map, static int orte_pls_bproc_node_array(orte_job_map_t* map,
int ** node_array, int * node_array_len); int ** node_array, int * node_array_len);
static int orte_pls_bproc_node_list(int * node_array, int node_array_len, static int orte_pls_bproc_node_list(int * node_array, int node_array_len,
int ** node_list, int * num_nodes, int ** node_list, int * num_nodes,
@ -123,12 +120,12 @@ static int bproc_vexecmove(int nnodes, int *nodes, int *pids, const char *cmd,
#endif #endif
static void orte_pls_bproc_setup_env(char *** env); static void orte_pls_bproc_setup_env(char *** env);
static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp, static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
int ** node_arrays, int * node_array_lens, orte_job_map_t *map,
int num_contexts, int num_procs,
orte_vpid_t global_vpid_start, orte_vpid_t global_vpid_start,
orte_jobid_t jobid, int* num_daemons); orte_jobid_t jobid, int* num_daemons);
static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid, static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
orte_rmaps_base_map_t* map, int num_processes, orte_job_map_t* map, int num_processes,
int num_slots,
orte_vpid_t vpid_start, orte_vpid_t vpid_start,
orte_vpid_t global_vpid_start, orte_vpid_t global_vpid_start,
int app_context, int app_context,
@ -144,7 +141,7 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
* @retval >=0 the number of processes * @retval >=0 the number of processes
* @retval <0 orte err * @retval <0 orte err
*/ */
static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map, static int orte_pls_bproc_node_array(orte_job_map_t* map,
int ** node_array, int * node_array_len) { int ** node_array, int * node_array_len) {
opal_list_item_t* item; opal_list_item_t* item;
int num_procs = 0; int num_procs = 0;
@ -156,8 +153,8 @@ static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
for(item = opal_list_get_first(&map->nodes); for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes); item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
if(*node_array_len < atol(((orte_rmaps_base_node_t*)item)->node->node_name)) { if(*node_array_len < atol(((orte_mapped_node_t*)item)->nodename)) {
*node_array_len = atol(((orte_rmaps_base_node_t*)item)->node->node_name); *node_array_len = atol(((orte_mapped_node_t*)item)->nodename);
} }
} }
(*node_array_len)++; (*node_array_len)++;
@ -172,9 +169,9 @@ static int orte_pls_bproc_node_array(orte_rmaps_base_map_t* map,
for(item = opal_list_get_first(&map->nodes); for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes); item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
orte_rmaps_base_node_t* node = (orte_rmaps_base_node_t*)item; orte_mapped_node_t* node = (orte_mapped_node_t*)item;
num_on_node = opal_list_get_size(&node->node_procs); num_on_node = opal_list_get_size(&node->procs);
(*node_array)[atol(node->node->node_name)] += num_on_node; (*node_array)[atol(node->nodename)] += num_on_node;
num_procs += num_on_node; num_procs += num_on_node;
} }
return num_procs; return num_procs;
@ -493,14 +490,12 @@ static void orte_pls_bproc_setup_env(char *** env)
* @retval error * @retval error
*/ */
static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp, static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
int ** node_arrays, int * node_array_lens, orte_job_map_t *map,
int num_contexts, int num_procs,
orte_vpid_t global_vpid_start, orte_vpid_t global_vpid_start,
orte_jobid_t jobid, int *num_launched) { orte_jobid_t jobid, int *num_launched) {
int * daemon_list = NULL; int * daemon_list = NULL;
int num_nodes = 0;
int num_daemons = 0; int num_daemons = 0;
int rc, i, j; int rc, i;
int * pids = NULL; int * pids = NULL;
int argc; int argc;
char ** argv = NULL; char ** argv = NULL;
@ -524,26 +519,25 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
*/ */
OBJ_CONSTRUCT(&daemons, opal_list_t); OBJ_CONSTRUCT(&daemons, opal_list_t);
/* find the length of the longest node array */ /* get the number of nodes in this job and allocate an array for
for(i = 0; i < num_contexts; i++) { * their names so we can pass that to bproc - populate the list
if(node_array_lens[i] > num_nodes) { * with the node names
num_nodes = node_array_lens[i]; */
} num_daemons = opal_list_get_size(&map->nodes);
} if(NULL == (daemon_list = (int*)malloc(sizeof(int) * num_daemons))) {
if(NULL == (daemon_list = (int*)malloc(sizeof(int) * num_nodes))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup; goto cleanup;
} }
/* create a list of all the nodes that need daemons, which is all the nodes i = 0;
* that will have at least 1 process */ for (item = opal_list_get_first(&map->nodes);
for(i = 0; i < num_nodes; i++) { item != opal_list_get_end(&map->nodes);
for(j = 0; j < num_contexts; j++) { item = opal_list_get_next(item)) {
if(i < node_array_lens[j] && 0 < *(node_arrays[j] + i)) { orte_mapped_node_t *node = (orte_mapped_node_t*)item;
daemon_list[num_daemons++] = i;
break; daemon_list[i++] = atoi(node->nodename);
}
}
} }
/* allocate storage to save the daemon pids */
if(NULL == (pids = (int*)malloc(sizeof(int) * num_daemons))) { if(NULL == (pids = (int*)malloc(sizeof(int) * num_daemons))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup; goto cleanup;
@ -574,7 +568,7 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
/* set up the base environment so the daemons can get their names once launched */ /* set up the base environment so the daemons can get their names once launched */
rc = orte_ns_nds_bproc_put(cellid, daemon_jobid, daemon_vpid_start, rc = orte_ns_nds_bproc_put(cellid, daemon_jobid, daemon_vpid_start,
global_vpid_start, num_procs, envp); global_vpid_start, num_daemons, envp);
if(ORTE_SUCCESS != rc) { if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
@ -695,7 +689,7 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
} }
} }
/* store the daemon info */ /* store the daemon info */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) { if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
*num_launched = num_daemons; *num_launched = num_daemons;
@ -857,19 +851,20 @@ orte_pls_bproc_monitor_nodes(void)
* @retval error * @retval error
*/ */
static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid, static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
orte_rmaps_base_map_t* map, int num_processes, orte_job_map_t* map, int num_processes, int num_slots,
orte_vpid_t vpid_start, orte_vpid_t vpid_start,
orte_vpid_t global_vpid_start, orte_vpid_t global_vpid_start,
int app_context, int * node_array, int app_context, int * node_array,
int node_array_len) { int node_array_len) {
int * node_list = NULL; int * node_list = NULL;
int num_nodes, num_slots, cycle; int num_nodes, cycle;
int rc, i, j, stride; int rc, i, j, stride;
int * pids = NULL; int * pids = NULL;
char * var, * param; char * var, * param;
orte_process_name_t * proc_name; orte_process_name_t * proc_name;
struct bproc_io_t bproc_io[3]; struct bproc_io_t bproc_io[3];
orte_rmaps_base_node_t *node; char **env;
int dbg;
OPAL_TRACE(1); OPAL_TRACE(1);
@ -878,25 +873,16 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* point to the env array for this app_context */
env = map->apps[app_context]->env;
/* set up app context */ /* set up app context */
asprintf(&param, "%d", app_context); asprintf(&param, "%d", app_context);
var = mca_base_param_environ_variable("pls", "bproc", "app_context"); var = mca_base_param_environ_variable("pls", "bproc", "app_context");
opal_setenv(var, param, true, &map->app->env); opal_setenv(var, param, true, &env);
free(param); free(param);
free(var); free(var);
/* in order for bproc processes to properly compute their name,
* we have to provide them with info on the number of slots
* on each node (which is a constant in bproc). We will pass this
* in an appropriate parameter which we set for each app_context
*/
node = (orte_rmaps_base_node_t*)opal_list_get_first(&map->nodes);
if (NULL == node) {
ORTE_ERROR_LOG(ORTE_ERROR);
return ORTE_ERROR;
}
num_slots = node->node->node_slots;
/* set the vpid-to-vpid stride based on the mapping mode */ /* set the vpid-to-vpid stride based on the mapping mode */
if (mca_pls_bproc_component.bynode) { if (mca_pls_bproc_component.bynode) {
/* we are mapping by node, so we want to set the stride /* we are mapping by node, so we want to set the stride
@ -914,7 +900,7 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
/* and push that value into the process' environment */ /* and push that value into the process' environment */
asprintf(&param, "%ld", (long)stride); asprintf(&param, "%ld", (long)stride);
var = mca_base_param_environ_variable("pls", "bproc", "stride"); var = mca_base_param_environ_variable("pls", "bproc", "stride");
opal_setenv(var, param, true, &map->app->env); opal_setenv(var, param, true, &env);
free(param); free(param);
free(var); free(var);
@ -943,11 +929,14 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
fprintf(stderr, "launching app %s\n", map->apps[app_context]->app);
while(0 != num_nodes) { while(0 != num_nodes) {
fprintf(stderr, "\tlaunching cycle %d\n", i);
for (dbg=0; dbg<num_nodes; dbg++) fprintf(stderr, "\t\tlaunching on node %d\n", node_list[dbg]);
/* setup environment so the procs can figure out their names */ /* setup environment so the procs can figure out their names */
rc = orte_ns_nds_bproc_put(cellid, jobid, vpid_start, global_vpid_start, rc = orte_ns_nds_bproc_put(cellid, jobid, vpid_start, global_vpid_start,
num_processes, &map->app->env); num_processes, &env);
if(ORTE_SUCCESS != rc) { if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
@ -962,21 +951,22 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid,
opal_output(0, "pls_bproc: launching %d processes", num_nodes); opal_output(0, "pls_bproc: launching %d processes", num_nodes);
} }
rc = bproc_vexecmove_io(num_nodes, node_list, pids, bproc_io, 3, rc = bproc_vexecmove_io(num_nodes, node_list, pids, bproc_io, 3,
map->app->app, map->app->argv, map->app->env); map->apps[app_context]->app,
map->apps[app_context]->argv, env);
if(0 < mca_pls_bproc_component.debug) { if(0 < mca_pls_bproc_component.debug) {
opal_output(0, "pls_bproc: %d processes launched. First pid: %d", opal_output(0, "pls_bproc: %d processes launched. First pid: %d",
rc, *pids); rc, *pids);
} }
if(rc != num_nodes) { if(rc != num_nodes) {
opal_show_help("help-pls-bproc.txt", "proc-launch-number", true, opal_show_help("help-pls-bproc.txt", "proc-launch-number", true,
num_nodes, rc, map->app->app); num_nodes, rc, map->apps[app_context]->app);
rc = ORTE_ERROR; rc = ORTE_ERROR;
goto cleanup; goto cleanup;
} }
for(j = 0; j < num_nodes; j++) { for(j = 0; j < num_nodes; j++) {
if(0 >= pids[j]) { if(0 >= pids[j]) {
opal_show_help("help-pls-bproc.txt", "proc-launch-bad-pid", true, opal_show_help("help-pls-bproc.txt", "proc-launch-bad-pid", true,
node_list[j], pids[j], errno, map->app->app); node_list[j], pids[j], errno, map->apps[app_context]->app);
rc = ORTE_ERROR; rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
@ -1056,10 +1046,10 @@ cleanup:
* @retval error * @retval error
*/ */
int orte_pls_bproc_launch(orte_jobid_t jobid) { int orte_pls_bproc_launch(orte_jobid_t jobid) {
opal_list_item_t* item, *item2; opal_list_item_t* item;
opal_list_t mapping;
orte_cellid_t cellid; orte_cellid_t cellid;
orte_rmaps_base_map_t* map; orte_job_map_t* map;
orte_mapped_node_t *map_node;
orte_vpid_t vpid_launch; orte_vpid_t vpid_launch;
orte_vpid_t vpid_range; orte_vpid_t vpid_range;
orte_vpid_t vpid_start; orte_vpid_t vpid_start;
@ -1068,11 +1058,13 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
int ** node_array = NULL; int ** node_array = NULL;
int * node_array_len = NULL; int * node_array_len = NULL;
int num_processes = 0; int num_processes = 0;
int num_daemons = 0; int num_daemons;
int context = 0; int num_slots;
int j; int context;
int i, j;
orte_std_cntr_t idx; orte_std_cntr_t idx;
char cwd_save[OMPI_PATH_MAX + 1]; char cwd_save[OMPI_PATH_MAX + 1];
orte_ras_node_t *ras_node;
OPAL_TRACE(1); OPAL_TRACE(1);
@ -1089,12 +1081,12 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
} }
cwd_save[sizeof(cwd_save) - 1] = '\0'; cwd_save[sizeof(cwd_save) - 1] = '\0';
/* query for the application context and allocated nodes */ /* get the job map */
OBJ_CONSTRUCT(&mapping, opal_list_t); if(ORTE_SUCCESS != (rc = orte_rmaps.get_job_map(&map, jobid))) {
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_map(jobid, &mapping))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
if(ORTE_SUCCESS != (rc = orte_rmgr.get_vpid_range(jobid, &vpid_start, if(ORTE_SUCCESS != (rc = orte_rmgr.get_vpid_range(jobid, &vpid_start,
&vpid_range))) { &vpid_range))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -1104,30 +1096,18 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
/* get the cellid */ /* get the cellid */
cellid = orte_process_info.my_name->cellid; cellid = orte_process_info.my_name->cellid;
/* do a large lock so the processes will not decrement the process count /* check all of the app_contexts for sanity */
* until we are done launching */ for (i=0; i < map->num_apps; i++) {
for (item = opal_list_get_first(&mapping);
item != opal_list_get_end(&mapping);
item = opal_list_get_next(item)) {
orte_std_cntr_t i;
map = (orte_rmaps_base_map_t*) item;
orte_dss.dump(0, map, ORTE_JOB_MAP);
for (i = 0; i < map->num_procs; ++i) {
orte_app_context_t *context = map->app;
/* Check that the cwd is sane. We have to chdir there in /* Check that the cwd is sane. We have to chdir there in
to check the executable, because the executable could to check the executable, because the executable could
have been specified as a relative path to the wdir */ have been specified as a relative path to the wdir */
rc = orte_rmgr.check_context_cwd(context, true); rc = orte_rmgr.check_context_cwd(map->apps[i], true);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
goto cleanup; goto cleanup;
} }
/* Check that the app exists and is executable */ /* Check that the app exists and is executable */
rc = orte_rmgr.check_context_app(context); rc = orte_rmgr.check_context_app(map->apps[i]);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
goto cleanup; goto cleanup;
} }
@ -1138,7 +1118,23 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
goto cleanup; goto cleanup;
} }
} }
/* For Bproc, we need to know how many slots were allocated on each
* node so the spawned processes can computer their name. Only Bproc
* needs to do this, so we choose not to modify the mapped_node struct
* to hold this info - bproc can go get it.
*
* Since Bproc also requires that the slots allocated on each node
* be the same, we really only need to lookup a single node. So grab
* the data for the first node on the map
*/
map_node = (orte_mapped_node_t*)opal_list_get_first(&map->nodes);
if (NULL == (ras_node = orte_ras.node_lookup(map_node->cell, map_node->nodename))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
goto cleanup;
} }
num_slots = ras_node->node_slots;
OBJ_RELEASE(ras_node);
if(0 < mca_pls_bproc_component.debug) { if(0 < mca_pls_bproc_component.debug) {
opal_output(0, "pls_bproc: --- starting to launch procs ---"); opal_output(0, "pls_bproc: --- starting to launch procs ---");
@ -1146,44 +1142,34 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
/* create an array to hold the pointers to the node arrays for each app /* create an array to hold the pointers to the node arrays for each app
* context. Also, create an array to hold the lengths of the node arrays */ * context. Also, create an array to hold the lengths of the node arrays */
node_array = malloc(opal_list_get_size(&mapping) * sizeof(int *)); node_array = malloc(map->num_apps * sizeof(int *));
node_array_len = malloc(opal_list_get_size(&mapping) * sizeof(int *)); node_array_len = malloc(map->num_apps * sizeof(int *));
/* for each application context - create a node array and setup its env */ /* for each application context - create a node array and setup its env */
for(item = opal_list_get_first(&mapping); for(i=0; i < map->num_apps; i++) {
item != opal_list_get_end(&mapping); rc = orte_pls_bproc_node_array(map, &node_array[i],
item = opal_list_get_next(item)) { &node_array_len[i]);
map = (orte_rmaps_base_map_t*)item;
rc = orte_pls_bproc_node_array(map, &node_array[context],
&node_array_len[context]);
if(0 > rc) { if(0 > rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
orte_pls_bproc_setup_env(&map->app->env); orte_pls_bproc_setup_env(&map->apps[i]->env);
num_processes += rc; num_processes += rc;
context++;
} }
/* save the active node names */
idx = 0; idx = 0;
for (item = opal_list_get_first(&mapping); for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&mapping); item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
orte_rmaps_base_map_t *map = (orte_rmaps_base_map_t*) item; orte_mapped_node_t* node = (orte_mapped_node_t*) item;
for (item2 = opal_list_get_first(&map->nodes);
item2 != opal_list_get_end(&map->nodes);
item2 = opal_list_get_next(item2)) {
orte_ras_node_t* node = (orte_ras_node_t*) item2;
rc = orte_pointer_array_add(&idx, mca_pls_bproc_component.active_node_names, rc = orte_pointer_array_add(&idx, mca_pls_bproc_component.active_node_names,
strdup(node->node_name)); strdup(node->nodename));
}
} }
/* setup subscription for each node so we can detect /* setup subscription for each node so we can detect
when the node's state changes, usefull for aborting when when the node's state changes, useful for aborting when
a bproc node up and dies */ a bproc node up and dies */
rc = orte_pls_bproc_monitor_nodes(); rc = orte_pls_bproc_monitor_nodes();
@ -1193,9 +1179,11 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
goto cleanup; goto cleanup;
} }
/* launch the daemons on all the nodes which have processes assign to them */ /* launch the daemons on all the nodes which have processes assigned to them.
rc = orte_pls_bproc_launch_daemons(cellid, &map->app->env, node_array, * We need to send along an appropriate environment for the daemons. Since
node_array_len, context, num_processes, * there must be at least ONE app_context, we can just take that one
*/
rc = orte_pls_bproc_launch_daemons(cellid, &map->apps[0]->env, map,
vpid_start, jobid, &num_daemons); vpid_start, jobid, &num_daemons);
if(ORTE_SUCCESS != rc) { if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -1238,44 +1226,35 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) {
} }
} }
context = 0;
vpid_launch = vpid_start; vpid_launch = vpid_start;
opal_output(0, "launching apps");
/* for each application context launch the app */ /* for each application context launch the app */
for(item = opal_list_get_first(&mapping); for(context=0; context < map->num_apps; context++) {
item != opal_list_get_end(&mapping); rc = orte_rmgr.check_context_cwd(map->apps[context], true);
item = opal_list_get_next(item)) {
map = (orte_rmaps_base_map_t*)item;
rc = orte_rmgr.check_context_cwd(map->app, true);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
goto cleanup; goto cleanup;
} }
rc = orte_pls_bproc_launch_app(cellid, jobid, map, num_processes, rc = orte_pls_bproc_launch_app(cellid, jobid, map, num_processes, num_slots,
vpid_launch, vpid_start, map->app->idx, vpid_launch, vpid_start, context,
node_array[context], node_array_len[context]); node_array[context], node_array_len[context]);
if(ORTE_SUCCESS != rc) { if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
free(node_array[context]); free(node_array[context]);
context++;
vpid_launch = vpid_start + mca_pls_bproc_component.num_procs; vpid_launch = vpid_start + mca_pls_bproc_component.num_procs;
} }
mca_pls_bproc_component.done_launching = true; mca_pls_bproc_component.done_launching = true;
cleanup: cleanup:
chdir(cwd_save); chdir(cwd_save);
while(NULL != (item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(item);
}
if(NULL != node_array) { if(NULL != node_array) {
free(node_array); free(node_array);
} }
if(NULL != node_array_len) { if(NULL != node_array_len) {
free(node_array_len); free(node_array_len);
} }
OBJ_DESTRUCT(&mapping);
return rc; return rc;
} }

Просмотреть файл

@ -81,11 +81,9 @@
#include "orte/mca/gpr/gpr.h" #include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/ras_types.h" #include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/smr/smr.h" #include "orte/mca/smr/smr.h"
/* clean up for ORTE 2.0 */
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/pls/pls.h" #include "orte/mca/pls/pls.h"
#include "orte/mca/pls/base/pls_private.h" #include "orte/mca/pls/base/pls_private.h"
#include "orte/mca/pls/gridengine/pls_gridengine.h" #include "orte/mca/pls/gridengine/pls_gridengine.h"
@ -104,21 +102,10 @@ orte_pls_base_module_t orte_pls_gridengine_module = {
orte_pls_gridengine_finalize orte_pls_gridengine_finalize
}; };
/**
* struct used to have enough information to clean up the state of the
* universe if a daemon aborts
*/
struct gridengine_daemon_info_t {
opal_object_t super;
orte_process_name_t *name;
char *nodename;
};
typedef struct gridengine_daemon_info_t gridengine_daemon_info_t;
static OBJ_CLASS_INSTANCE(gridengine_daemon_info_t,
opal_object_t,
NULL, NULL);
static void set_handler_default(int sig); static void set_handler_default(int sig);
#if 0
static int update_slot_keyval(orte_ras_node_t* node, int* slot_cnt); static int update_slot_keyval(orte_ras_node_t* node, int* slot_cnt);
#endif
/** /**
* Fill the orted_path variable with the directory to the orted * Fill the orted_path variable with the directory to the orted
@ -146,7 +133,7 @@ static int orte_pls_gridengine_fill_orted_path(char** orted_path)
*/ */
static void orte_pls_gridengine_wait_daemon(pid_t pid, int status, void* cbdata) static void orte_pls_gridengine_wait_daemon(pid_t pid, int status, void* cbdata)
{ {
gridengine_daemon_info_t *info = (gridengine_daemon_info_t*) cbdata; orte_pls_daemon_info_t *info = (orte_pls_daemon_info_t*) cbdata;
int rc; int rc;
/* if qrsh exited abnormally, set the daemon's state to aborted /* if qrsh exited abnormally, set the daemon's state to aborted
@ -204,16 +191,16 @@ static void orte_pls_gridengine_wait_daemon(pid_t pid, int status, void* cbdata)
*/ */
int orte_pls_gridengine_launch_job(orte_jobid_t jobid) int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
{ {
opal_list_t mapping; orte_job_map_t *map;
opal_list_item_t* m_item, *n_item; opal_list_item_t *n_item;
orte_std_cntr_t num_nodes; orte_std_cntr_t num_nodes;
orte_vpid_t vpid; orte_vpid_t vpid;
int node_name_index1; int node_name_index1;
int node_name_index2; int node_name_index2;
int proc_name_index; int proc_name_index;
int orted_index; int orted_index;
int call_yield_index;
char *jobid_string; char *jobid_string;
char *prefix_dir;
char *uri, *param; char *uri, *param;
char **argv; char **argv;
int argc; int argc;
@ -229,26 +216,19 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
*/ */
OBJ_CONSTRUCT(&daemons, opal_list_t); OBJ_CONSTRUCT(&daemons, opal_list_t);
/* Query the list of nodes allocated and mapped to this job. /* Get the map for this job.
* We need the entire mapping for a couple of reasons: * We need the entire mapping for a couple of reasons:
* - need the prefix to start with. * - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes * - need to know if we are launching on a subset of the allocated nodes
* All other mapping responsibilities fall to orted in the fork PLS * All other mapping responsibilities fall to orted in the fork PLS
*/ */
OBJ_CONSTRUCT(&mapping, opal_list_t); rc = orte_rmaps.get_job_map(&map, jobid);
rc = orte_rmaps_base_get_map(jobid, &mapping);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
num_nodes = 0; num_nodes = (orte_std_cntr_t)opal_list_get_size(&map->nodes);
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
num_nodes += opal_list_get_size(&map->nodes);
}
/* /*
* Allocate a range of vpids for the daemons. * Allocate a range of vpids for the daemons.
@ -353,10 +333,6 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
free(uri); free(uri);
free(param); free(param);
opal_argv_append(&argc, &argv, "--mpi-call-yield");
call_yield_index = argc;
opal_argv_append(&argc, &argv, "0");
if (mca_pls_gridengine_component.debug) { if (mca_pls_gridengine_component.debug) {
param = opal_argv_join(argv, ' '); param = opal_argv_join(argv, ' ');
if (NULL != param) { if (NULL != param) {
@ -368,44 +344,41 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
/* Figure out the basenames for the libdir and bindir. There is a /* Figure out the basenames for the libdir and bindir. There is a
lengthy comment about this in pls_rsh_module.c explaining all lengthy comment about this in pls_rsh_module.c explaining all
the rationale for how / why we're doing this. */ the rationale for how / why we're doing this.
*/
lib_base = opal_basename(OPAL_LIBDIR); lib_base = opal_basename(OPAL_LIBDIR);
bin_base = opal_basename(OPAL_BINDIR); bin_base = opal_basename(OPAL_BINDIR);
/* /* See the note about prefix_dir in the orte/mca/pls/slurm/pls_slurm.c
* Iterate through each of the contexts * module. Fo here, just note that we must have at least one app_context,
* and we take the prefix_dir from that first one.
*/ */
for(m_item = opal_list_get_first(&mapping); prefix_dir = map->apps[0]->prefix_dir;
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
char *prefix_dir = map->app->prefix_dir;
/* /*
* For each of the contexts - iterate through the nodes. * Iterate through the nodes.
*/ */
for(n_item = opal_list_get_first(&map->nodes); for(n_item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes); n_item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) { n_item = opal_list_get_next(n_item)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item; orte_mapped_node_t* rmaps_node = (orte_mapped_node_t*)n_item;
orte_ras_node_t* ras_node = rmaps_node->node;
orte_process_name_t* name; orte_process_name_t* name;
pid_t pid; pid_t pid;
char *exec_path, *orted_path; char *exec_path, *orted_path;
char **exec_argv; char **exec_argv;
#if 0
int remain_slot_cnt; int remain_slot_cnt;
/* already launched on this node */ /* RHC - I don't believe this code is really necessary any longer.
if(ras_node->node_launched++ != 0) { * The mapper correctly accounts for slots that have already been
if (mca_pls_gridengine_component.debug) { * used. Even if another job starts to run between the time the
opal_output(0, "pls:gridengine: already launched on this node, %s", * mapper maps this job and we get to this point, the new job
ras_node->node_name); * will have gone through the mapper and will not overuse the node.
} * As this code consumes considerable time, I have sliced it out
continue; * of the code for now.
} *
* query the registry for the remaining gridengine slot count on
/* query the registry for the remaining gridengine slot count on
* this node, and update the registry for the count for the * this node, and update the registry for the count for the
* current process launch */ * current process launch */
if (ORTE_SUCCESS != (rc = if (ORTE_SUCCESS != (rc =
@ -421,22 +394,23 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
true, ras_node->node_name, true); true, ras_node->node_name, true);
exit(-1); /* exit instead of return ORTE_ERR_OUT_OF_RESOURCE */ exit(-1); /* exit instead of return ORTE_ERR_OUT_OF_RESOURCE */
} }
#endif
/* setup node name */ /* setup node name */
free(argv[node_name_index1]); free(argv[node_name_index1]);
if (NULL != ras_node->node_username && if (NULL != rmaps_node->username &&
0 != strlen (ras_node->node_username)) { 0 != strlen (rmaps_node->username)) {
asprintf(&argv[node_name_index1], "%s@%s", asprintf(&argv[node_name_index1], "%s@%s",
ras_node->node_username, ras_node->node_name); rmaps_node->username, rmaps_node->nodename);
} else { } else {
argv[node_name_index1] = strdup(ras_node->node_name); argv[node_name_index1] = strdup(rmaps_node->nodename);
} }
free(argv[node_name_index2]); free(argv[node_name_index2]);
argv[node_name_index2] = strdup(ras_node->node_name); argv[node_name_index2] = strdup(rmaps_node->nodename);
/* initialize daemons process name */ /* initialize daemons process name */
rc = orte_ns.create_process_name(&name, ras_node->node_cellid, 0, vpid); rc = orte_ns.create_process_name(&name, rmaps_node->cell, 0, vpid);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
@ -445,8 +419,8 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
/* new daemon - setup to record its info */ /* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t); dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid; dmn->active_job = jobid;
dmn->cell = ras_node->node_cellid; dmn->cell = rmaps_node->cell;
dmn->nodename = strdup(ras_node->node_name); dmn->nodename = strdup(rmaps_node->nodename);
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) { if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
@ -474,27 +448,7 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
if (mca_pls_gridengine_component.debug) { if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: launching on node %s", opal_output(0, "pls:gridengine: launching on node %s",
ras_node->node_name); rmaps_node->nodename);
}
/* set the progress engine schedule for this node.
* if node_slots is set to zero, then we default to
* NOT being oversubscribed
*/
if (ras_node->node_slots > 0 &&
(orte_std_cntr_t)opal_list_get_size(&rmaps_node->node_procs) > ras_node->node_slots) {
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
ras_node->node_slots, opal_list_get_size(&rmaps_node->node_procs));
}
free(argv[call_yield_index]);
argv[call_yield_index] = strdup("1");
} else {
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
free(argv[call_yield_index]);
argv[call_yield_index] = strdup("0");
} }
/* setting exec_argv and exec_path for qrsh */ /* setting exec_argv and exec_path for qrsh */
@ -655,8 +609,6 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
opal_output(0, "pls:gridengine: execve failed with errno=%d\n", errno); opal_output(0, "pls:gridengine: execve failed with errno=%d\n", errno);
exit(-1); exit(-1);
} else { /* parent */ } else { /* parent */
gridengine_daemon_info_t *daemon_info;
if (mca_pls_gridengine_component.debug) { if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: parent"); opal_output(0, "pls:gridengine: parent");
} }
@ -664,36 +616,20 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
/* setup callback on sigchild - wait until setup above is complete /* setup callback on sigchild - wait until setup above is complete
* as the callback can occur in the call to orte_wait_cb * as the callback can occur in the call to orte_wait_cb
*/ */
daemon_info = OBJ_NEW(gridengine_daemon_info_t); orte_wait_cb(pid, orte_pls_gridengine_wait_daemon, dmn);
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(daemon_info->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
}
daemon_info->nodename= strdup(ras_node->node_name);
orte_wait_cb(pid, orte_pls_gridengine_wait_daemon, daemon_info);
vpid++; vpid++;
} }
free(name); free(name);
} }
}
/* all done, so store the daemon info on the registry */ /* all done, so store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) { if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
cleanup: cleanup:
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&mapping);
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&daemons);
if (NULL != lib_base) { if (NULL != lib_base) {
free(lib_base); free(lib_base);
} }
@ -707,6 +643,7 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
return rc; return rc;
} }
#if 0
/** /**
* Query the registry for the gridengine slot count, and update it * Query the registry for the gridengine slot count, and update it
*/ */
@ -808,6 +745,7 @@ static int update_slot_keyval(orte_ras_node_t* ras_node, int* slot_cnt)
return rc; return rc;
} }
#endif
/** /**
* Query the registry for all nodes participating in the job * Query the registry for all nodes participating in the job

Просмотреть файл

@ -38,6 +38,7 @@
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/gpr/gpr.h" #include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h" #include "orte/mca/ns/ns.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rmgr/rmgr.h" #include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml.h"
#include "orte/mca/smr/smr.h" #include "orte/mca/smr/smr.h"
@ -46,10 +47,7 @@
#include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_wait.h"
/* remove for ORTE 2.0 */ /* remove for ORTE 2.0 */
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/sds/base/base.h" #include "orte/mca/sds/base/base.h"
#include "orte/mca/rmgr/base/rmgr_private.h"
#include "orte/mca/pls/pls.h" #include "orte/mca/pls/pls.h"
#include "orte/mca/pls/poe/pls_poe.h" #include "orte/mca/pls/poe/pls_poe.h"
@ -338,33 +336,34 @@ poe_wait_job - call back when POE finish
*/ */
static void poe_wait_job(pid_t pid, int status, void* cbdata) static void poe_wait_job(pid_t pid, int status, void* cbdata)
{ {
opal_list_t map; orte_job_map_t *map;
opal_list_item_t* item; opal_list_item_t *item, *item2;
int rc; int rc;
/* query allocation for the job */ /* query allocation for the job */
OBJ_CONSTRUCT(&map, opal_list_t); rc = orte_rmaps.get_job_map(&map, mca_pls_poe_component.jobid);
rc = orte_rmaps_base_get_map(mca_pls_poe_component.jobid,&map);
if(ORTE_SUCCESS != rc) { if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
for(item = opal_list_get_first(&map); for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map); item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item; orte_mapped_node_t* node = (orte_mapped_node_t*) item;
orte_std_cntr_t i;
for(i = 0 ; i < map->num_procs ; ++i) { for (item2 = opal_list_get_first(&node->procs);
orte_session_dir_finalize(&(map->procs[i])->proc_name); item2 != opal_list_get_end(&node->procs);
rc = orte_smr.set_proc_state(&(map->procs[i]->proc_name), item2 = opal_list_get_next(item2)) {
orte_mapped_proc_t* proc = (orte_mapped_proc_t*)item2;
orte_session_dir_finalize(&(proc->name));
rc = orte_smr.set_proc_state(&(proc->name),
ORTE_PROC_STATE_ABORTED, status); ORTE_PROC_STATE_ABORTED, status);
}
if(ORTE_SUCCESS != rc) { if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
} }
OBJ_DESTRUCT(&map); }
} }
/** /**
@ -379,7 +378,7 @@ poe_create_cmd_file - create POE command file
static int poe_create_cmd_file( static int poe_create_cmd_file(
FILE *cfp, FILE *cfp,
orte_app_context_t* context, orte_app_context_t* context,
orte_rmaps_base_proc_t* proc, orte_mapped_proc_t* proc,
orte_vpid_t vpid_start, orte_vpid_t vpid_start,
orte_vpid_t vpid_range) orte_vpid_t vpid_range)
{ {
@ -428,7 +427,7 @@ static int poe_create_cmd_file(
free(uri); free(uri);
/* push name into environment */ /* push name into environment */
orte_ns_nds_env_put(&proc->proc_name, vpid_start, vpid_range, &environ_copy); orte_ns_nds_env_put(&proc->name, vpid_start, vpid_range, &environ_copy);
if (context->argv == NULL) { if (context->argv == NULL) {
context->argv = malloc(sizeof(char*)*2); context->argv = malloc(sizeof(char*)*2);
@ -461,8 +460,8 @@ poe_launch_interactive - launch an interactive job
*/ */
static inline int poe_launch_interactive_job(orte_jobid_t jobid) static inline int poe_launch_interactive_job(orte_jobid_t jobid)
{ {
opal_list_t map, nodes, mapping_list; orte_job_map_t *map;
opal_list_item_t* item; opal_list_item_t *item, *item2;
orte_vpid_t vpid_start, vpid_range; orte_vpid_t vpid_start, vpid_range;
orte_std_cntr_t num_nodes, num_procs; orte_std_cntr_t num_nodes, num_procs;
FILE *hfp, *cfp; FILE *hfp, *cfp;
@ -479,12 +478,11 @@ static inline int poe_launch_interactive_job(orte_jobid_t jobid)
mca_pls_poe_component.jobid = jobid; mca_pls_poe_component.jobid = jobid;
OBJ_CONSTRUCT(&nodes, opal_list_t); /* get the map for this job */
OBJ_CONSTRUCT(&mapping_list, opal_list_t); rc = orte_rmaps.get_job_map(&map, jobid);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
num_nodes = opal_list_get_size(&nodes); num_nodes = opal_list_get_size(&map->nodes);
if(!strncmp(mca_pls_poe_component.resource_allocation,"hostfile",8)) { if(!strncmp(mca_pls_poe_component.resource_allocation,"hostfile",8)) {
@ -494,35 +492,32 @@ static inline int poe_launch_interactive_job(orte_jobid_t jobid)
(NULL==(hfp=fopen(mca_pls_poe_component.hostfile,"w"))) ) { (NULL==(hfp=fopen(mca_pls_poe_component.hostfile,"w"))) ) {
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
for(item = opal_list_get_first(&nodes); for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&nodes); item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item; orte_mapped_node_t* node = (orte_mapped_node_t*)item;
fprintf(hfp,"%s\n",node->node_name); fprintf(hfp,"%s\n",node->nodename);
} }
fclose(hfp); fclose(hfp);
}
rc = orte_rmgr_base_get_job_slots(jobid, &num_procs);
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
OBJ_CONSTRUCT(&map, opal_list_t);
rc = orte_rmaps_base_get_map(jobid,&map);
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
rc = orte_rmgr.get_vpid_range(jobid, &vpid_start, &vpid_range); rc = orte_rmgr.get_vpid_range(jobid, &vpid_start, &vpid_range);
if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
/* Create a temporary POE command file */ /* Create a temporary POE command file */
for(item = opal_list_get_first(&map); num_procs = 0;
item != opal_list_get_end(&map); for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map2 = (orte_rmaps_base_map_t*)item; orte_mapped_node_t* node = (orte_mapped_node_t*)item;
orte_std_cntr_t i;
for(i=0; i<map2->num_procs; i++) { for (item2 = opal_list_get_first(&node->procs);
rc = poe_create_cmd_file(cfp, map2->app, map2->procs[i], vpid_start, vpid_range); item2 != opal_list_get_end(&node->procs);
item2 = opal_list_get_next(item2)) {
orte_mapped_proc_t* proc = (orte_mapped_proc_t*)item2;
rc = poe_create_cmd_file(cfp, map->apps[proc->app_idx], proc, vpid_start, vpid_range);
if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; }
num_procs++;
} }
} }
fclose(cfp); fclose(cfp);
@ -587,20 +582,6 @@ static inline int poe_launch_interactive_job(orte_jobid_t jobid)
cleanup: cleanup:
while(NULL != (item = opal_list_remove_first(&map))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&map);
while(NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
while(NULL != (item = opal_list_remove_first(&mapping_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping_list);
return rc; return rc;
} }

Просмотреть файл

@ -81,7 +81,7 @@
#include "orte/mca/gpr/gpr.h" #include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/ras_types.h" #include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h" #include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/smr/smr.h" #include "orte/mca/smr/smr.h"
#include "orte/mca/pls/pls.h" #include "orte/mca/pls/pls.h"
@ -109,17 +109,6 @@ orte_pls_base_module_t orte_pls_rsh_module = {
orte_pls_rsh_finalize orte_pls_rsh_finalize
}; };
/* struct used to have enough information to clean up the state of the
universe if a daemon aborts */
struct rsh_daemon_info_t {
opal_object_t super;
orte_ras_node_t* node;
orte_jobid_t jobid;
};
typedef struct rsh_daemon_info_t rsh_daemon_info_t;
static OBJ_CLASS_INSTANCE(rsh_daemon_info_t,
opal_object_t,
NULL, NULL);
static void set_handler_default(int sig); static void set_handler_default(int sig);
enum { enum {
@ -140,11 +129,15 @@ static const char * orte_pls_rsh_shell_name[] = {
"unknown" "unknown"
}; };
/* local global storage of the list of active daemons */
opal_list_t active_daemons;
/** /**
* Check the Shell variable on the specified node * Check the Shell variable on the specified node
*/ */
static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell) static int orte_pls_rsh_probe(orte_mapped_node_t * node, orte_pls_rsh_shell * shell)
{ {
char ** argv; char ** argv;
int argc, rc, nfds, i; int argc, rc, nfds, i;
@ -156,7 +149,7 @@ static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell
if (mca_pls_rsh_component.debug) { if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: going to check SHELL variable on node %s\n", opal_output(0, "pls:rsh: going to check SHELL variable on node %s\n",
node->node_name); node->nodename);
} }
*shell = ORTE_PLS_RSH_SHELL_UNKNOWN; *shell = ORTE_PLS_RSH_SHELL_UNKNOWN;
/* /*
@ -164,7 +157,7 @@ static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell
*/ */
argv = opal_argv_copy(mca_pls_rsh_component.agent_argv); argv = opal_argv_copy(mca_pls_rsh_component.agent_argv);
argc = mca_pls_rsh_component.agent_argc; argc = mca_pls_rsh_component.agent_argc;
opal_argv_append(&argc, &argv, node->node_name); opal_argv_append(&argc, &argv, node->nodename);
opal_argv_append(&argc, &argv, "echo $SHELL"); opal_argv_append(&argc, &argv, "echo $SHELL");
if (pipe(fd)) { if (pipe(fd)) {
opal_output(0, "pls:rsh: pipe failed with errno=%d\n", errno); opal_output(0, "pls:rsh: pipe failed with errno=%d\n", errno);
@ -251,7 +244,7 @@ static int orte_pls_rsh_probe(orte_ras_node_t * node, orte_pls_rsh_shell * shell
} }
if (mca_pls_rsh_component.debug) { if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: node:%s has SHELL:%s\n", opal_output(0, "pls:rsh: node:%s has SHELL:%s\n",
node->node_name, orte_pls_rsh_shell_name[*shell]); node->nodename, orte_pls_rsh_shell_name[*shell]);
} }
return rc; return rc;
} }
@ -283,9 +276,10 @@ static int orte_pls_rsh_fill_exec_path ( char ** exec_path)
static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata) static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
{ {
rsh_daemon_info_t *info = (rsh_daemon_info_t*) cbdata; orte_pls_daemon_info_t *info = (orte_pls_daemon_info_t*) cbdata;
opal_list_t map; orte_mapped_node_t *node;
opal_list_item_t* item; orte_mapped_proc_t *proc;
opal_list_item_t *item;
int rc; int rc;
/* if ssh exited abnormally, set the child processes to aborted /* if ssh exited abnormally, set the child processes to aborted
@ -298,11 +292,8 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
*/ */
if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) { if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) {
/* get the mapping for our node so we can cancel the right things */ /* get the mapping for our node so we can cancel the right things */
OBJ_CONSTRUCT(&map, opal_list_t); rc = orte_rmaps.get_node_map(&node, info->cell,
rc = orte_rmaps_base_get_node_map(orte_process_info.my_name->cellid, info->nodename, info->active_job);
info->jobid,
info->node->node_name,
&map);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
@ -310,33 +301,30 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
/* set state of all processes associated with the daemon as /* set state of all processes associated with the daemon as
terminated */ terminated */
for(item = opal_list_get_first(&map); for(item = opal_list_get_first(&node->procs);
item != opal_list_get_end(&map); item != opal_list_get_end(&node->procs);
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item; proc = (orte_mapped_proc_t*) item;
orte_std_cntr_t i;
for (i = 0 ; i < map->num_procs ; ++i) {
/* Clean up the session directory as if we were the /* Clean up the session directory as if we were the
process itself. This covers the case where the process itself. This covers the case where the
process died abnormally and didn't cleanup its own process died abnormally and didn't cleanup its own
session directory. */ session directory. */
orte_session_dir_finalize(&(map->procs[i])->proc_name); orte_session_dir_finalize(&(proc->name));
rc = orte_smr.set_proc_state(&(map->procs[i]->proc_name), rc = orte_smr.set_proc_state(&(proc->name),
ORTE_PROC_STATE_ABORTED, status); ORTE_PROC_STATE_ABORTED, status);
}
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
} }
OBJ_DESTRUCT(&map); OBJ_RELEASE(node);
cleanup: cleanup:
/* tell the user something went wrong */ /* tell the user something went wrong */
opal_output(0, "ERROR: A daemon on node %s failed to start as expected.", opal_output(0, "ERROR: A daemon on node %s failed to start as expected.",
info->node->node_name); info->nodename);
opal_output(0, "ERROR: There may be more information available from"); opal_output(0, "ERROR: There may be more information available from");
opal_output(0, "ERROR: the remote shell (see above)."); opal_output(0, "ERROR: the remote shell (see above).");
@ -361,6 +349,15 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
/* release any waiting threads */ /* release any waiting threads */
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock); OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
/* tell the system that this daemon is gone */
if (ORTE_SUCCESS != (rc = orte_pls_base_remove_daemon(info))) {
ORTE_ERROR_LOG(rc);
}
/* remove the daemon from our local list */
opal_list_remove_item(&active_daemons, &info->super);
OBJ_RELEASE(info);
if (mca_pls_rsh_component.num_children-- >= if (mca_pls_rsh_component.num_children-- >=
mca_pls_rsh_component.num_concurrent || mca_pls_rsh_component.num_concurrent ||
mca_pls_rsh_component.num_children == 0) { mca_pls_rsh_component.num_children == 0) {
@ -368,9 +365,6 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
} }
OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock); OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock);
/* cleanup */
OBJ_RELEASE(info->node);
OBJ_RELEASE(info);
} }
/** /**
@ -380,18 +374,19 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
int orte_pls_rsh_launch(orte_jobid_t jobid) int orte_pls_rsh_launch(orte_jobid_t jobid)
{ {
opal_list_t mapping; orte_job_map_t *map;
opal_list_item_t* m_item, *n_item; opal_list_item_t *n_item;
orte_mapped_node_t *rmaps_node;
orte_std_cntr_t num_nodes; orte_std_cntr_t num_nodes;
orte_vpid_t vpid; orte_vpid_t vpid;
int node_name_index1; int node_name_index1;
int node_name_index2; int node_name_index2;
int proc_name_index; int proc_name_index;
int local_exec_index, local_exec_index_end; int local_exec_index, local_exec_index_end;
int call_yield_index;
char *jobid_string; char *jobid_string;
char *uri, *param; char *uri, *param;
char **argv, **tmp; char **argv, **tmp;
char *prefix_dir;
int argc; int argc;
int rc; int rc;
sigset_t sigs; sigset_t sigs;
@ -399,33 +394,45 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
bool remote_bash = false, remote_csh = false; bool remote_bash = false, remote_csh = false;
bool local_bash = false, local_csh = false; bool local_bash = false, local_csh = false;
char *lib_base = NULL, *bin_base = NULL; char *lib_base = NULL, *bin_base = NULL;
opal_list_t daemons;
orte_pls_daemon_info_t *dmn; orte_pls_daemon_info_t *dmn;
/* setup a list that will contain the info for all the daemons /* setup a list that will contain the info for all the daemons
* so we can store it on the registry when done * so we can store it on the registry when done and use it
* locally to track their state
*/ */
OBJ_CONSTRUCT(&daemons, opal_list_t); OBJ_CONSTRUCT(&active_daemons, opal_list_t);
/* Query the list of nodes allocated and mapped to this job. /* Get the map for this job
* We need the entire mapping for a couple of reasons: * We need the entire mapping for a couple of reasons:
* - need the prefix to start with. * - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes * - need to know the nodes we are launching on
* All other mapping responsibilities fall to orted in the fork PLS * All other mapping responsibilities fall to orted in the fork PLS
*/ */
OBJ_CONSTRUCT(&mapping, opal_list_t); rc = orte_rmaps.get_job_map(&map, jobid);
rc = orte_rmaps_base_get_map(jobid, &mapping);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
goto cleanup; goto cleanup;
} }
num_nodes = 0; num_nodes = (orte_std_cntr_t)opal_list_get_size(&map->nodes);
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping); /*
m_item = opal_list_get_next(m_item)) { * After a discussion between Ralph & Jeff, we concluded that we
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item; * really are handling the prefix dir option incorrectly. It currently
num_nodes += opal_list_get_size(&map->nodes); * is associated with an app_context, yet it really refers to the
} * location where OpenRTE/Open MPI is installed on a NODE. Fixing
* this right now would involve significant change to orterun as well
* as elsewhere, so we will intentionally leave this incorrect at this
* point. The error, however, is identical to that seen in all prior
* releases of OpenRTE/Open MPI, so our behavior is no worse than before.
*
* A note to fix this, along with ideas on how to do so, has been filed
* on the project's Trac system under "feature enhancement".
*
* For now, default to the prefix_dir provided in the first app_context.
* Since there always MUST be at least one app_context, we are safe in
* doing this.
*/
prefix_dir = map->apps[0]->prefix_dir;
/* /*
* Allocate a range of vpids for the daemons. * Allocate a range of vpids for the daemons.
@ -475,12 +482,8 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
} }
} else { } else {
orte_pls_rsh_shell shell; orte_pls_rsh_shell shell;
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)opal_list_get_first(&mapping); rmaps_node = (orte_mapped_node_t*)opal_list_get_first(&map->nodes);
orte_rmaps_base_node_t* rmaps_node = rc = orte_pls_rsh_probe(rmaps_node, &shell);
(orte_rmaps_base_node_t*)opal_list_get_first(&map->nodes);
orte_ras_node_t* node = rmaps_node->node;
rc = orte_pls_rsh_probe(node, &shell);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -582,10 +585,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
free(uri); free(uri);
free(param); free(param);
opal_argv_append(&argc, &argv, "--mpi-call-yield");
call_yield_index = argc;
opal_argv_append(&argc, &argv, "0");
local_exec_index_end = argc; local_exec_index_end = argc;
if (!(remote_csh || remote_bash)) { if (!(remote_csh || remote_bash)) {
opal_argv_append(&argc, &argv, ")"); opal_argv_append(&argc, &argv, ")");
@ -633,60 +632,48 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
bin_base = opal_basename(OPAL_BINDIR); bin_base = opal_basename(OPAL_BINDIR);
/* /*
* Iterate through each of the contexts * Iterate through each of the nodes
*/
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
char * prefix_dir = map->app->prefix_dir;
/*
* For each of the contexts - iterate through the nodes.
*/ */
for(n_item = opal_list_get_first(&map->nodes); for(n_item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes); n_item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) { n_item = opal_list_get_next(n_item)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
orte_ras_node_t* ras_node = rmaps_node->node;
orte_process_name_t* name; orte_process_name_t* name;
pid_t pid; pid_t pid;
char *exec_path; char *exec_path;
char **exec_argv; char **exec_argv;
/* already launched on this node */ rmaps_node = (orte_mapped_node_t*)n_item;
if(ras_node->node_launched++ != 0)
continue;
/* new daemon - setup to record its info */ /* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t); dmn = OBJ_NEW(orte_pls_daemon_info_t);
opal_list_append(&daemons, &dmn->super); dmn->active_job = jobid;
opal_list_append(&active_daemons, &dmn->super);
/* setup node name */ /* setup node name */
free(argv[node_name_index1]); free(argv[node_name_index1]);
if (NULL != ras_node->node_username && if (NULL != rmaps_node->username &&
0 != strlen (ras_node->node_username)) { 0 != strlen (rmaps_node->username)) {
asprintf (&argv[node_name_index1], "%s@%s", asprintf (&argv[node_name_index1], "%s@%s",
ras_node->node_username, ras_node->node_name); rmaps_node->username, rmaps_node->nodename);
} else { } else {
argv[node_name_index1] = strdup(ras_node->node_name); argv[node_name_index1] = strdup(rmaps_node->nodename);
} }
free(argv[node_name_index2]); free(argv[node_name_index2]);
argv[node_name_index2] = strdup(ras_node->node_name); argv[node_name_index2] = strdup(rmaps_node->nodename);
/* save it in the daemon info */ /* save it in the daemon info */
dmn->nodename = strdup(ras_node->node_name); dmn->nodename = strdup(rmaps_node->nodename);
/* initialize daemons process name */ /* initialize daemons process name */
rc = orte_ns.create_process_name(&name, ras_node->node_cellid, 0, vpid); rc = orte_ns.create_process_name(&name, rmaps_node->cell, 0, vpid);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
/* save it in the daemon info */ /* save it in the daemon info */
dmn->cell = ras_node->node_cellid; dmn->cell = rmaps_node->cell;
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) { if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
@ -715,28 +702,14 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
if (mca_pls_rsh_component.debug) { if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: launching on node %s\n", opal_output(0, "pls:rsh: launching on node %s\n",
ras_node->node_name); rmaps_node->nodename);
} }
/* set the progress engine schedule for this node. /* We don't need to sense an oversubscribed condition and set the sched_yield
* if node_slots is set to zero, then we default to * for the node as we are only launching the daemons at this time. The daemons
* NOT being oversubscribed * are now smart enough to set the oversubscribed condition themselves when
* they launch the local procs.
*/ */
if (ras_node->node_slots > 0 &&
(orte_std_cntr_t)opal_list_get_size(&rmaps_node->node_procs) > ras_node->node_slots) {
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
ras_node->node_slots, opal_list_get_size(&rmaps_node->node_procs));
}
free(argv[call_yield_index]);
argv[call_yield_index] = strdup("1");
} else {
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
free(argv[call_yield_index]);
argv[call_yield_index] = strdup("0");
}
/* Is this a local launch? /* Is this a local launch?
* *
@ -746,11 +719,11 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
* current nodename, which must be local. If that doesn't * current nodename, which must be local. If that doesn't
* match, check using ifislocal(). * match, check using ifislocal().
*/ */
if (0 == strcmp(ras_node->node_name, orte_system_info.nodename) || if (0 == strcmp(rmaps_node->nodename, orte_system_info.nodename) ||
opal_ifislocal(ras_node->node_name)) { opal_ifislocal(rmaps_node->nodename)) {
if (mca_pls_rsh_component.debug) { if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: %s is a LOCAL node\n", opal_output(0, "pls:rsh: %s is a LOCAL node\n",
ras_node->node_name); rmaps_node->nodename);
} }
exec_argv = &argv[local_exec_index]; exec_argv = &argv[local_exec_index];
exec_path = opal_path_findv(exec_argv[0], 0, environ, NULL); exec_path = opal_path_findv(exec_argv[0], 0, environ, NULL);
@ -847,7 +820,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
} else { } else {
if (mca_pls_rsh_component.debug) { if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: %s is a REMOTE node\n", opal_output(0, "pls:rsh: %s is a REMOTE node\n",
ras_node->node_name); rmaps_node->nodename);
} }
exec_argv = argv; exec_argv = argv;
exec_path = strdup(mca_pls_rsh_component.agent_path); exec_path = strdup(mca_pls_rsh_component.agent_path);
@ -951,8 +924,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
exit(-1); exit(-1);
} else { /* father */ } else { /* father */
rsh_daemon_info_t *daemon_info;
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock); OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
/* JJH Bug: /* JJH Bug:
* If we are in '--debug-daemons' we keep the ssh connection * If we are in '--debug-daemons' we keep the ssh connection
@ -974,11 +945,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
/* setup callback on sigchild - wait until setup above is complete /* setup callback on sigchild - wait until setup above is complete
* as the callback can occur in the call to orte_wait_cb * as the callback can occur in the call to orte_wait_cb
*/ */
daemon_info = OBJ_NEW(rsh_daemon_info_t); orte_wait_cb(pid, orte_pls_rsh_wait_daemon, dmn);
OBJ_RETAIN(ras_node);
daemon_info->node = ras_node;
daemon_info->jobid = jobid;
orte_wait_cb(pid, orte_pls_rsh_wait_daemon, daemon_info);
/* if required - add delay to avoid problems w/ X11 authentication */ /* if required - add delay to avoid problems w/ X11 authentication */
if (mca_pls_rsh_component.debug && mca_pls_rsh_component.delay) { if (mca_pls_rsh_component.debug && mca_pls_rsh_component.delay) {
@ -988,23 +955,14 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
} }
free(name); free(name);
} }
}
/* all done, so store the daemon info on the registry */ /* all done, so store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) { if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&active_daemons))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
cleanup: cleanup:
while (NULL != (m_item = opal_list_remove_first(&mapping))) { /* OBJ_RELEASE(map); */
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&mapping);
while (NULL != (m_item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&daemons);
if (NULL != lib_base) { if (NULL != lib_base) {
free(lib_base); free(lib_base);

Просмотреть файл

@ -59,7 +59,7 @@
#include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/smr/smr.h" #include "orte/mca/smr/smr.h"
#include "orte/mca/rmaps/base/rmaps_private.h" #include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/pls/pls.h" #include "orte/mca/pls/pls.h"
#include "orte/mca/pls/base/pls_private.h" #include "orte/mca/pls/base/pls_private.h"
@ -109,8 +109,8 @@ extern char **environ;
static int pls_slurm_launch_job(orte_jobid_t jobid) static int pls_slurm_launch_job(orte_jobid_t jobid)
{ {
opal_list_t nodes, mapping_list; orte_job_map_t *map;
opal_list_item_t *item, *item2; opal_list_item_t *item;
size_t num_nodes; size_t num_nodes;
orte_vpid_t vpid; orte_vpid_t vpid;
char *jobid_string; char *jobid_string;
@ -137,15 +137,13 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
*/ */
OBJ_CONSTRUCT(&daemons, opal_list_t); OBJ_CONSTRUCT(&daemons, opal_list_t);
/* Query the list of nodes allocated and mapped to this job. /* Query the map for this job.
* We need the entire mapping for a couple of reasons: * We need the entire mapping for a couple of reasons:
* - need the prefix to start with. * - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes * - need to know if we are launching on a subset of the allocated nodes
* All other mapping responsibilities fall to orted in the fork PLS * All other mapping responsibilities fall to orted in the fork PLS
*/ */
OBJ_CONSTRUCT(&nodes, opal_list_t); rc = orte_rmaps.get_job_map(&map, jobid);
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
goto cleanup; goto cleanup;
} }
@ -153,7 +151,7 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
/* /*
* Allocate a range of vpids for the daemons. * Allocate a range of vpids for the daemons.
*/ */
num_nodes = opal_list_get_size(&nodes); num_nodes = opal_list_get_size(&map->nodes);
if (num_nodes == 0) { if (num_nodes == 0) {
return ORTE_ERR_BAD_PARAM; return ORTE_ERR_BAD_PARAM;
} }
@ -206,12 +204,12 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
nodelist_argv = NULL; nodelist_argv = NULL;
nodelist_argc = 0; nodelist_argc = 0;
for (item = opal_list_get_first(&nodes); for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&nodes); item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item; orte_mapped_node_t* node = (orte_mapped_node_t*)item;
opal_argv_append(&nodelist_argc, &nodelist_argv, node->node_name); opal_argv_append(&nodelist_argc, &nodelist_argv, node->nodename);
} }
nodelist_flat = opal_argv_join(nodelist_argv, ','); nodelist_flat = opal_argv_join(nodelist_argv, ',');
asprintf(&tmp, "--nodelist=%s", nodelist_flat); asprintf(&tmp, "--nodelist=%s", nodelist_flat);
@ -308,43 +306,15 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
} }
} }
/* Bookkeeping -- save the node names */ /* Copy the prefix-directory specified in the
cur_prefix = NULL;
for (item = opal_list_get_first(&nodes);
item != opal_list_get_end(&nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
opal_list_t map;
size_t num_processes;
OBJ_CONSTRUCT(&map, opal_list_t);
/* Get the mapping of this very node */
rc = orte_rmaps_base_get_node_map(orte_process_info.my_name->cellid,
jobid,
node->node_name,
&map);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* Copy the prefix-directory specified within the
corresponding app_context. If there are multiple, corresponding app_context. If there are multiple,
different prefix's in the app context, complain (i.e., only different prefix's in the app context, complain (i.e., only
allow one --prefix option for the entire slurm run -- we allow one --prefix option for the entire slurm run -- we
don't support different --prefix'es for different nodes in don't support different --prefix'es for different nodes in
the SLURM pls) */ the SLURM pls) */
num_processes = 0; cur_prefix = NULL;
for (item2 = opal_list_get_first(&map); for (i=0; i < map->num_apps; i++) {
item2 != opal_list_get_end(&map); char * app_prefix_dir = map->apps[i]->prefix_dir;
item2 = opal_list_get_next(item2)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item2;
char * app_prefix_dir = map->app->prefix_dir;
/* Increment the number of processes allocated to this node
* This allows us to accurately test for oversubscription */
num_processes += map->num_procs;
/* Check for already set cur_prefix_dir -- if different, /* Check for already set cur_prefix_dir -- if different,
complain */ complain */
if (NULL != app_prefix_dir) { if (NULL != app_prefix_dir) {
@ -358,7 +328,7 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
/* If not yet set, copy it; iff set, then it's the /* If not yet set, copy it; iff set, then it's the
same anyway */ same anyway */
if (NULL == cur_prefix) { if (NULL == cur_prefix) {
cur_prefix = strdup(map->app->prefix_dir); cur_prefix = strdup(app_prefix_dir);
if (mca_pls_slurm_component.debug) { if (mca_pls_slurm_component.debug) {
opal_output (0, "pls:slurm: Set prefix:%s", opal_output (0, "pls:slurm: Set prefix:%s",
cur_prefix); cur_prefix);
@ -367,21 +337,28 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
} }
} }
/* setup the daemon info for each node */
vpid = 0;
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
orte_mapped_node_t* node = (orte_mapped_node_t*)item;
/* record the daemons info for this node */ /* record the daemons info for this node */
dmn = OBJ_NEW(orte_pls_daemon_info_t); dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->cell = node->node_cellid; dmn->active_job = jobid;
dmn->nodename = strdup(node->node_name); dmn->cell = node->cell;
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(dmn->name), node->node_cellid, 0, vpid))) { dmn->nodename = strdup(node->nodename);
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(dmn->name), dmn->cell, 0, vpid))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
opal_list_append(&daemons, &dmn->super); opal_list_append(&daemons, &dmn->super);
vpid++; vpid++;
} }
/* store the daemon info on the registry */ /* store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) { if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
@ -390,29 +367,6 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
var = mca_base_param_environ_variable("seed", NULL, NULL); var = mca_base_param_environ_variable("seed", NULL, NULL);
opal_setenv(var, "0", true, &env); opal_setenv(var, "0", true, &env);
#if 0
/* JMS What to do for sched_yield? */
/* set the progress engine schedule for this node. if node_slots
is set to zero, then we default to NOT being oversubscribed */
if (node->node_slots > 0 &&
num_processes > node->node_slots) {
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
node->node_slots, num_processes);
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "1", true, &env);
} else {
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "0", true, &env);
}
free(var);
#endif
/* exec the daemon */ /* exec the daemon */
rc = pls_slurm_start_proc(argc, argv, env, cur_prefix); rc = pls_slurm_start_proc(argc, argv, env, cur_prefix);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
@ -424,16 +378,6 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
/* JMS: how do we catch when srun dies? */ /* JMS: how do we catch when srun dies? */
cleanup: cleanup:
while (NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
while (NULL != (item = opal_list_remove_first(&mapping_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping_list);
while (NULL != (item = opal_list_remove_first(&daemons))) { while (NULL != (item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(item); OBJ_RELEASE(item);
} }

Просмотреть файл

@ -58,12 +58,10 @@
#include "orte/mca/smr/smr.h" #include "orte/mca/smr/smr.h"
#include "orte/mca/gpr/gpr.h" #include "orte/mca/gpr/gpr.h"
#include "orte/mca/sds/base/base.h" #include "orte/mca/sds/base/base.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml.h"
#include "orte/mca/ns/ns.h" #include "orte/mca/ns/ns.h"
/* needs to be cleaned up for ORTE 2.0 */
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/pls/base/pls_private.h" #include "orte/mca/pls/base/pls_private.h"
#include "pls_tm.h" #include "pls_tm.h"
@ -118,14 +116,16 @@ extern char **environ;
static int pls_tm_launch_job(orte_jobid_t jobid) static int pls_tm_launch_job(orte_jobid_t jobid)
{ {
opal_list_t mapping; orte_job_map_t *map;
opal_list_item_t *m_item, *n_item; opal_list_item_t *item;
size_t num_nodes; size_t num_nodes;
orte_vpid_t vpid; orte_vpid_t vpid;
int node_name_index; int node_name_index;
int proc_name_index; int proc_name_index;
char *jobid_string; char *jobid_string;
char *uri, *param; char *uri, *param;
char **env;
char *var;
char **argv; char **argv;
int argc; int argc;
int rc; int rc;
@ -139,24 +139,17 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
opal_list_t daemons; opal_list_t daemons;
orte_pls_daemon_info_t *dmn; orte_pls_daemon_info_t *dmn;
/* Query the list of nodes allocated and mapped to this job. /* Query the map for this job.
* We need the entire mapping for a couple of reasons: * We need the entire mapping for a couple of reasons:
* - need the prefix to start with. * - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes * - need to know if we are launching on a subset of the allocated nodes
*/ */
OBJ_CONSTRUCT(&mapping, opal_list_t); rc = orte_rmaps.get_job_map(&map, jobid);
rc = orte_rmaps_base_get_map(jobid, &mapping);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
goto cleanup; goto cleanup;
} }
num_nodes = 0; num_nodes = opal_list_get_size(&map->nodes);
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
num_nodes += opal_list_get_size(&map->nodes);
}
/* /*
* Allocate a range of vpids for the daemons. * Allocate a range of vpids for the daemons.
@ -286,31 +279,25 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
lib_base = opal_basename(OPAL_LIBDIR); lib_base = opal_basename(OPAL_LIBDIR);
bin_base = opal_basename(OPAL_BINDIR); bin_base = opal_basename(OPAL_BINDIR);
/*
* iterate through each of the contexts
*/
for (m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
char** env;
char* var;
/* setup environment */ /* setup environment */
env = opal_argv_copy(environ); env = opal_argv_copy(environ);
var = mca_base_param_environ_variable("seed",NULL,NULL); var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env); opal_setenv(var, "0", true, &env);
/* If we have a prefix, then modify the PATH and /* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables. */ LD_LIBRARY_PATH environment variables. We only allow
if (NULL != map->app->prefix_dir) { a single prefix to be specified. Since there will
always be at least one app_context, we take it from
there
*/
if (NULL != map->apps[0]->prefix_dir) {
char *newenv; char *newenv;
for (i = 0; NULL != env && NULL != env[i]; ++i) { for (i = 0; NULL != env && NULL != env[i]; ++i) {
/* Reset PATH */ /* Reset PATH */
if (0 == strncmp("PATH=", env[i], 5)) { if (0 == strncmp("PATH=", env[i], 5)) {
asprintf(&newenv, "%s/%s:%s", asprintf(&newenv, "%s/%s:%s",
map->app->prefix_dir, bin_base, env[i] + 5); map->apps[0]->prefix_dir, bin_base, env[i] + 5);
if (mca_pls_tm_component.debug) { if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: resetting PATH: %s", opal_output(0, "pls:tm: resetting PATH: %s",
newenv); newenv);
@ -322,7 +309,7 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
/* Reset LD_LIBRARY_PATH */ /* Reset LD_LIBRARY_PATH */
else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) { else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) {
asprintf(&newenv, "%s/%s:%s", asprintf(&newenv, "%s/%s:%s",
map->app->prefix_dir, lib_base, env[i] + 16); map->apps[0]->prefix_dir, lib_base, env[i] + 16);
if (mca_pls_tm_component.debug) { if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: resetting LD_LIBRARY_PATH: %s", opal_output(0, "pls:tm: resetting LD_LIBRARY_PATH: %s",
newenv); newenv);
@ -347,19 +334,13 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
/* Iterate through each of the nodes and spin /* Iterate through each of the nodes and spin
* up a daemon. * up a daemon.
*/ */
for (n_item = opal_list_get_first(&map->nodes); for (item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes); item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) { item = opal_list_get_next(n_item)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item; orte_mapped_node_t* node = (orte_mapped_node_t*)item;
orte_ras_node_t* node = rmaps_node->node;
orte_process_name_t* name; orte_process_name_t* name;
char* name_string; char* name_string;
/* already launched on this node */
if (0 != node->node_launched++) {
continue;
}
/* new daemon - setup to record its info */ /* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t); dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid; dmn->active_job = jobid;
@ -367,14 +348,14 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
/* setup node name */ /* setup node name */
free(argv[node_name_index]); free(argv[node_name_index]);
argv[node_name_index] = strdup(node->node_name); argv[node_name_index] = strdup(node->nodename);
/* record the node name in the daemon struct */ /* record the node name in the daemon struct */
dmn->cell = node->node_cellid; dmn->cell = node->cell;
dmn->nodename = strdup(node->node_name); dmn->nodename = strdup(node->nodename);
/* initialize daemons process name */ /* initialize daemons process name */
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid); rc = orte_ns.create_process_name(&name, node->cell, 0, vpid);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
@ -390,7 +371,7 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
if (mca_pls_tm_component.debug || if (mca_pls_tm_component.debug ||
mca_pls_tm_component.verbose) { mca_pls_tm_component.verbose) {
opal_output(0, "pls:tm: launching on node %s", opal_output(0, "pls:tm: launching on node %s",
node->node_name); node->nodename);
} }
/* setup process name */ /* setup process name */
@ -402,28 +383,6 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
free(argv[proc_name_index]); free(argv[proc_name_index]);
argv[proc_name_index] = strdup(name_string); argv[proc_name_index] = strdup(name_string);
/* set the progress engine schedule for this node.
* if node_slots is set to zero, then we default to
* NOT being oversubscribed
*/
if (node->node_slots > 0 &&
(orte_std_cntr_t)opal_list_get_size(&rmaps_node->node_procs) > node->node_slots) {
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
node->node_slots,
opal_list_get_size(&rmaps_node->node_procs));
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "1", true, &env);
} else {
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "0", true, &env);
}
free(var);
/* exec the daemon */ /* exec the daemon */
if (mca_pls_tm_component.debug) { if (mca_pls_tm_component.debug) {
param = opal_argv_join(argv, ' '); param = opal_argv_join(argv, ' ');
@ -433,7 +392,7 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
} }
} }
rc = pls_tm_start_proc(node->node_name, argc, argv, env, rc = pls_tm_start_proc(node->nodename, argc, argv, env,
tm_task_ids + launched, tm_task_ids + launched,
tm_events + launched); tm_events + launched);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
@ -447,13 +406,12 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
/* Allow some progress to occur */ /* Allow some progress to occur */
opal_event_loop(OPAL_EVLOOP_NONBLOCK); opal_event_loop(OPAL_EVLOOP_NONBLOCK);
} }
}
if (mca_pls_tm_component.debug) { if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm:launch: finished spawning orteds\n"); opal_output(0, "pls:tm:launch: finished spawning orteds\n");
} }
/* all done, so store the daemon info on the registry */ /* all done, so store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) { if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
@ -478,10 +436,6 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
free(tm_task_ids); free(tm_task_ids);
} }
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&mapping);
if (NULL != lib_base) { if (NULL != lib_base) {
free(lib_base); free(lib_base);
} }
@ -490,8 +444,8 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
} }
/* deconstruct the daemon list */ /* deconstruct the daemon list */
while (NULL != (m_item = opal_list_remove_first(&daemons))) { while (NULL != (item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(m_item); OBJ_RELEASE(item);
} }
OBJ_DESTRUCT(&daemons); OBJ_DESTRUCT(&daemons);

Просмотреть файл

@ -30,7 +30,7 @@
#import "orte/mca/pls/pls.h" #import "orte/mca/pls/pls.h"
#import "orte/mca/errmgr/errmgr.h" #import "orte/mca/errmgr/errmgr.h"
#import "orte/mca/ras/ras_types.h" #import "orte/mca/ras/ras_types.h"
#import "orte/mca/rmaps/base/rmaps_private.h" #import "orte/mca/rmaps/rmaps.h"
#import "orte/mca/smr/smr.h" #import "orte/mca/smr/smr.h"
#import "pls_xgrid_client.h" #import "pls_xgrid_client.h"
@ -229,8 +229,8 @@ char **environ;
-(int) launchJob:(orte_jobid_t) jobid -(int) launchJob:(orte_jobid_t) jobid
{ {
opal_list_t mapping; orte_job_map_t *map;
opal_list_item_t *m_item, *n_item; opal_list_item_t *item;
size_t num_nodes; size_t num_nodes;
orte_vpid_t vpid; orte_vpid_t vpid;
int rc, i = 0; int rc, i = 0;
@ -239,24 +239,17 @@ char **environ;
char *orted_path; char *orted_path;
char *nsuri = NULL, *gpruri = NULL; char *nsuri = NULL, *gpruri = NULL;
/* Query the list of nodes allocated and mapped to this job. /* Query the map for this job.
* We need the entire mapping for a couple of reasons: * We need the entire mapping for a couple of reasons:
* - need the prefix to start with. * - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes * - need to know if we are launching on a subset of the allocated nodes
*/ */
OBJ_CONSTRUCT(&mapping, opal_list_t); rc = orte_rmaps.get_job_map(&map, jobid);
rc = orte_rmaps_base_get_map(jobid, &mapping);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
goto cleanup; goto cleanup;
} }
num_nodes = 0; num_nodes = opal_list_get_size(&map->nodes);
for(m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
num_nodes += opal_list_get_size(&map->nodes);
}
/* /*
* Allocate a range of vpids for the daemons. * Allocate a range of vpids for the daemons.
@ -300,41 +293,27 @@ char **environ;
/* build up the array of task specifications */ /* build up the array of task specifications */
NSMutableDictionary *taskSpecifications = [NSMutableDictionary dictionary]; NSMutableDictionary *taskSpecifications = [NSMutableDictionary dictionary];
/*
* iterate through each of the contexts
*/
for (m_item = opal_list_get_first(&mapping);
m_item != opal_list_get_end(&mapping);
m_item = opal_list_get_next(m_item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
/* Iterate through each of the nodes and spin /* Iterate through each of the nodes and spin
* up a daemon. * up a daemon.
*/ */
for (n_item = opal_list_get_first(&map->nodes); for (item = opal_list_get_first(&map->nodes);
n_item != opal_list_get_end(&map->nodes); item != opal_list_get_end(&map->nodes);
n_item = opal_list_get_next(n_item)) { item = opal_list_get_next(n_item)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item; orte_mapped_node_t* rmaps_node = (orte_mapped_node_t*)item;
orte_ras_node_t* node = rmaps_node->node;
orte_process_name_t* name; orte_process_name_t* name;
char* name_string; char* name_string;
/* already launched on this node */
if (0 != node->node_launched++) {
continue;
}
/* new daemon - setup to record its info */ /* new daemon - setup to record its info */
dmn = OBJ_NEW(orte_pls_daemon_info_t); dmn = OBJ_NEW(orte_pls_daemon_info_t);
dmn->active_job = jobid; dmn->active_job = jobid;
opal_list_append(&daemons, &dmn->super); opal_list_append(&daemons, &dmn->super);
/* record the node name in the daemon struct */ /* record the node name in the daemon struct */
dmn->cell = node->node_cellid; dmn->cell = node->cell;
dmn->nodename = strdup(node->node_name); dmn->nodename = strdup(node->nodename);
/* initialize daemons process name */ /* initialize daemons process name */
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid); rc = orte_ns.create_process_name(&name, node->cell, 0, vpid);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
@ -349,7 +328,7 @@ char **environ;
/* setup per-node options */ /* setup per-node options */
opal_output_verbose(1, orte_pls_base.pls_output, opal_output_verbose(1, orte_pls_base.pls_output,
"orte:pls:xgrid: launching on node %s", "orte:pls:xgrid: launching on node %s",
node->node_name); node->nodename);
/* setup process name */ /* setup process name */
rc = orte_ns.get_proc_name_string(&name_string, name); rc = orte_ns.get_proc_name_string(&name_string, name);
@ -367,7 +346,7 @@ char **environ;
@"--bootproxy", [NSString stringWithFormat: @"%d", jobid], @"--bootproxy", [NSString stringWithFormat: @"%d", jobid],
@"--name", [NSString stringWithCString: name_string], @"--name", [NSString stringWithCString: name_string],
@"--num_procs", [NSString stringWithFormat: @"%d", 1], @"--num_procs", [NSString stringWithFormat: @"%d", 1],
@"--nodename", [NSString stringWithCString: node->node_name], @"--nodename", [NSString stringWithCString: node->nodename],
@"--nsreplica", [NSString stringWithCString: nsuri], @"--nsreplica", [NSString stringWithCString: nsuri],
@"--gprreplica", [NSString stringWithCString: gpruri], @"--gprreplica", [NSString stringWithCString: gpruri],
nil]; nil];
@ -378,7 +357,6 @@ char **environ;
vpid++; i++; vpid++; i++;
} }
}
/* job specification */ /* job specification */
NSMutableDictionary *jobSpecification = [NSMutableDictionary dictionary]; NSMutableDictionary *jobSpecification = [NSMutableDictionary dictionary];
@ -419,7 +397,7 @@ char **environ;
forKey: [NSString stringWithFormat: @"%d", jobid]]; forKey: [NSString stringWithFormat: @"%d", jobid]];
/* all done, so store the daemon info on the registry */ /* all done, so store the daemon info on the registry */
if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) { if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
@ -427,11 +405,6 @@ cleanup:
if (NULL != nsuri) free(nsuri); if (NULL != nsuri) free(nsuri);
if (NULL != gpruri) free(gpruri); if (NULL != gpruri) free(gpruri);
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(m_item);
}
OBJ_DESTRUCT(&mapping);
/* deconstruct the daemon list */ /* deconstruct the daemon list */
while (NULL != (m_item = opal_list_remove_first(&daemons))) { while (NULL != (m_item = opal_list_remove_first(&daemons))) {
OBJ_RELEASE(m_item); OBJ_RELEASE(m_item);

Просмотреть файл

@ -20,14 +20,14 @@ dist_pkgdata_DATA = base/help-orte-rmaps-base.txt
headers += \ headers += \
base/base.h \ base/base.h \
base/rmaps_class_instances.h \
base/rmaps_private.h base/rmaps_private.h
libmca_rmaps_la_SOURCES += \ libmca_rmaps_la_SOURCES += \
base/rmaps_base_close.c \ base/rmaps_base_close.c \
base/rmaps_base_map.c \ base/rmaps_base_registry_fns.c \
base/rmaps_base_map_job.c \ base/rmaps_base_map_job.c \
base/rmaps_base_node.c \ base/rmaps_base_support_fns.c \
base/rmaps_base_no_ops.c \
base/rmaps_base_open.c \ base/rmaps_base_open.c \
base/rmaps_base_receive.c \ base/rmaps_base_receive.c \
base/rmaps_base_find_avail.c \ base/rmaps_base_find_avail.c \

Просмотреть файл

@ -29,14 +29,14 @@
/* /*
* JOB_MAP * JOB_MAP
*/ */
int orte_rmaps_base_compare_map(orte_rmaps_base_map_t *value1, orte_rmaps_base_map_t *value2, orte_data_type_t type) int orte_rmaps_base_compare_map(orte_job_map_t *value1, orte_job_map_t *value2, orte_data_type_t type)
{ {
return ORTE_EQUAL; return ORTE_EQUAL;
} }
/* MAPPED_PROC */ /* MAPPED_PROC */
int orte_rmaps_base_compare_mapped_proc(orte_rmaps_base_proc_t *value1, orte_rmaps_base_proc_t *value2, orte_data_type_t type) int orte_rmaps_base_compare_mapped_proc(orte_mapped_proc_t *value1, orte_mapped_proc_t *value2, orte_data_type_t type)
{ {
return ORTE_EQUAL; return ORTE_EQUAL;
} }
@ -44,7 +44,7 @@ int orte_rmaps_base_compare_mapped_proc(orte_rmaps_base_proc_t *value1, orte_rma
/* MAPPED_NODE */ /* MAPPED_NODE */
int orte_rmaps_base_compare_mapped_node(orte_rmaps_base_node_t *value1, orte_rmaps_base_node_t *value2, orte_data_type_t type) int orte_rmaps_base_compare_mapped_node(orte_mapped_node_t *value1, orte_mapped_node_t *value2, orte_data_type_t type)
{ {
return ORTE_EQUAL; return ORTE_EQUAL;
} }

Просмотреть файл

@ -34,12 +34,12 @@
/* /*
* JOB_MAP * JOB_MAP
*/ */
int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t *src, orte_data_type_t type) int orte_rmaps_base_copy_map(orte_job_map_t **dest, orte_job_map_t *src, orte_data_type_t type)
{ {
orte_std_cntr_t i; orte_std_cntr_t i;
int rc; int rc;
opal_list_item_t *item; opal_list_item_t *item;
orte_rmaps_base_node_t *srcnode, *nodeptr; orte_mapped_node_t *srcnode, *nodeptr;
if (NULL == src) { if (NULL == src) {
*dest = NULL; *dest = NULL;
@ -47,34 +47,34 @@ int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t
} }
/* create the new object */ /* create the new object */
*dest = OBJ_NEW(orte_rmaps_base_map_t); *dest = OBJ_NEW(orte_job_map_t);
if (NULL == *dest) { if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* copy data into it */ /* copy data into it */
(*dest)->app = src->app; (*dest)->job = src->job;
(*dest)->num_apps = src->num_apps;
(*dest)->procs = (orte_rmaps_base_proc_t**)malloc(src->num_procs * sizeof(orte_rmaps_base_proc_t)); (*dest)->apps = (orte_app_context_t**)malloc(src->num_apps * sizeof(orte_app_context_t*));
if (NULL == (*dest)->procs) { if (NULL == (*dest)->apps) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(*dest); OBJ_RELEASE(*dest);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
for (i=0; i < src->num_procs; i++) { for (i=0; i < src->num_apps; i++) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_proc(&((*dest)->procs[i]), src->procs[i], ORTE_MAPPED_PROC))) { if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((*dest)->apps[i]), src->apps[i], ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest); OBJ_RELEASE(*dest);
return rc; return rc;
} }
} }
(*dest)->num_procs = src->num_procs;
for (item = opal_list_get_first(&(src->nodes)); for (item = opal_list_get_first(&(src->nodes));
item != opal_list_get_end(&(src->nodes)); item != opal_list_get_end(&(src->nodes));
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
srcnode = (orte_rmaps_base_node_t*)item; srcnode = (orte_mapped_node_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_node(&nodeptr, srcnode, ORTE_MAPPED_NODE))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_node(&nodeptr, srcnode, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest); OBJ_RELEASE(*dest);
@ -89,52 +89,40 @@ int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t
/* /*
* MAPPED_PROC * MAPPED_PROC
*/ */
int orte_rmaps_base_copy_mapped_proc(orte_rmaps_base_proc_t **dest, orte_rmaps_base_proc_t *src, orte_data_type_t type) int orte_rmaps_base_copy_mapped_proc(orte_mapped_proc_t **dest, orte_mapped_proc_t *src, orte_data_type_t type)
{ {
int rc;
if (NULL == src) { if (NULL == src) {
*dest = NULL; *dest = NULL;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
/* create the new object */ /* create the new object */
*dest = OBJ_NEW(orte_rmaps_base_proc_t); *dest = OBJ_NEW(orte_mapped_proc_t);
if (NULL == *dest) { if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* copy data into it */ /* copy data into it */
if (NULL != src->app) { (*dest)->name = src->name;
(*dest)->app = strdup(src->app);
}
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_node(&((*dest)->proc_node), src->proc_node, ORTE_MAPPED_NODE))) { (*dest)->rank = src->rank;
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest);
return rc;
}
(*dest)->proc_name = src->proc_name; (*dest)->app_idx = src->app_idx;
(*dest)->proc_rank = src->proc_rank;
(*dest)->pid = src->pid; (*dest)->pid = src->pid;
(*dest)->local_pid = src->local_pid;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
/* /*
* MAPPED_NODE * MAPPED_NODE
*/ */
int orte_rmaps_base_copy_mapped_node(orte_rmaps_base_node_t **dest, orte_rmaps_base_node_t *src, orte_data_type_t type) int orte_rmaps_base_copy_mapped_node(orte_mapped_node_t **dest, orte_mapped_node_t *src, orte_data_type_t type)
{ {
int rc; int rc;
opal_list_item_t *item; opal_list_item_t *item;
orte_rmaps_base_proc_t *srcproc, *procptr; orte_mapped_proc_t *srcproc, *procptr;
if (NULL == src) { if (NULL == src) {
*dest = NULL; *dest = NULL;
@ -142,29 +130,43 @@ int orte_rmaps_base_copy_mapped_node(orte_rmaps_base_node_t **dest, orte_rmaps_b
} }
/* create the new object */ /* create the new object */
*dest = OBJ_NEW(orte_rmaps_base_node_t); *dest = OBJ_NEW(orte_mapped_node_t);
if (NULL == *dest) { if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* copy data into it */ /* copy data into it */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((*dest)->node), src->node, ORTE_RAS_NODE))) { (*dest)->cell = src->cell;
if (NULL != src->nodename) {
(*dest)->nodename = strdup(src->nodename);
}
if (NULL != src->username) {
(*dest)->username = strdup(src->username);
}
if (NULL != src->daemon) {
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&((*dest)->daemon), src->daemon, ORTE_NAME))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest); OBJ_RELEASE(*dest);
return rc; return rc;
} }
}
for (item = opal_list_get_first(&(src->node_procs)); (*dest)->oversubscribed = src->oversubscribed;
item != opal_list_get_end(&(src->node_procs));
for (item = opal_list_get_first(&(src->procs));
item != opal_list_get_end(&(src->procs));
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
srcproc = (orte_rmaps_base_proc_t*)item; srcproc = (orte_mapped_proc_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_proc(&procptr, srcproc, ORTE_MAPPED_PROC))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_copy_mapped_proc(&procptr, srcproc, ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(*dest); OBJ_RELEASE(*dest);
return rc; return rc;
} }
opal_list_append(&((*dest)->node_procs), &procptr->super); opal_list_append(&((*dest)->procs), &procptr->super);
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;

Просмотреть файл

@ -38,29 +38,28 @@ int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
{ {
int rc; int rc;
orte_std_cntr_t i, num_nodes; orte_std_cntr_t i, num_nodes;
orte_rmaps_base_map_t **maps; orte_job_map_t **maps;
opal_list_item_t *item; opal_list_item_t *item;
orte_rmaps_base_node_t *srcnode; orte_mapped_node_t *srcnode;
/* array of pointers to orte_rmaps_base_map_t objects - need to pack the objects a set of fields at a time */ /* array of pointers to orte_job_map_t objects - need to pack the objects a set of fields at a time */
maps = (orte_rmaps_base_map_t**) src; maps = (orte_job_map_t**) src;
for (i=0; i < num_vals; i++) { for (i=0; i < num_vals; i++) {
/* pack the app_context */ /* pack the jobid this map is for */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, maps[i]->app, 1, ORTE_APP_CONTEXT))) { if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->job), 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the number of procs */ /* pack the number of app_contexts */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->num_procs), 1, ORTE_STD_CNTR))) { if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->num_apps), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the procs array */ /* pack the app_contexts */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(maps[i]->procs), if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, maps[i]->apps, maps[i]->num_apps, ORTE_APP_CONTEXT))) {
maps[i]->num_procs, ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
@ -77,7 +76,7 @@ int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
for (item = opal_list_get_first(&(maps[i]->nodes)); for (item = opal_list_get_first(&(maps[i]->nodes));
item != opal_list_get_end(&(maps[i]->nodes)); item != opal_list_get_end(&(maps[i]->nodes));
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
srcnode = (orte_rmaps_base_node_t*)item; srcnode = (orte_mapped_node_t*)item;
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)srcnode, if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)srcnode,
1, ORTE_MAPPED_NODE))) { 1, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -99,45 +98,33 @@ int orte_rmaps_base_pack_mapped_proc(orte_buffer_t *buffer, void *src,
{ {
int rc; int rc;
orte_std_cntr_t i; orte_std_cntr_t i;
orte_rmaps_base_proc_t **procs; orte_mapped_proc_t **procs;
/* array of pointers to orte_rmaps_base_proc_t objects - need to pack the objects a set of fields at a time */ /* array of pointers to orte_mapped_proc_t objects - need to pack the objects a set of fields at a time */
procs = (orte_rmaps_base_proc_t**) src; procs = (orte_mapped_proc_t**) src;
for (i=0; i < num_vals; i++) { for (i=0; i < num_vals; i++) {
/* pack the app */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, procs[i]->app, 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the proc_node */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, procs[i]->proc_node, 1, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the proc name */ /* pack the proc name */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(&(procs[i]->proc_name)), if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(&(procs[i]->name)),
1, ORTE_NAME))) { 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the rank */ /* pack the rank */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->proc_rank), 1, ORTE_STD_CNTR))) { if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->rank), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the pls-pid */ /* pack the pid */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->pid), 1, ORTE_PID))) { if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->pid), 1, ORTE_PID))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the local pid */ /* pack the app_idx */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->local_pid), 1, ORTE_PID))) { if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(procs[i]->app_idx), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
@ -155,22 +142,46 @@ int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
{ {
int rc; int rc;
orte_std_cntr_t i, num_procs; orte_std_cntr_t i, num_procs;
orte_rmaps_base_node_t **nodes; orte_mapped_node_t **nodes;
opal_list_item_t *item; opal_list_item_t *item;
orte_rmaps_base_proc_t *srcproc; orte_mapped_proc_t *srcproc;
/* array of pointers to orte_rmaps_base_node_t objects - need to pack the objects a set of fields at a time */ /* array of pointers to orte_mapped_node_t objects - need to pack the objects a set of fields at a time */
nodes = (orte_rmaps_base_node_t**) src; nodes = (orte_mapped_node_t**) src;
for (i=0; i < num_vals; i++) { for (i=0; i < num_vals; i++) {
/* pack the node object */ /* pack the cellid */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, nodes[i]->node, 1, ORTE_RAS_NODE))) { if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->cell), 1, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the nodename */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->nodename), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the username */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->username), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the daemon's name */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->daemon), 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the oversubscribed flag */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->oversubscribed), 1, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* pack the number of procs */ /* pack the number of procs */
num_procs = (orte_std_cntr_t)opal_list_get_size(&(nodes[i]->node_procs)); num_procs = (orte_std_cntr_t)opal_list_get_size(&(nodes[i]->procs));
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &num_procs, 1, ORTE_STD_CNTR))) { if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &num_procs, 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
@ -178,10 +189,10 @@ int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
/* pack the procs list */ /* pack the procs list */
if (0 < num_procs) { if (0 < num_procs) {
for (item = opal_list_get_first(&(nodes[i]->node_procs)); for (item = opal_list_get_first(&(nodes[i]->procs));
item != opal_list_get_end(&(nodes[i]->node_procs)); item != opal_list_get_end(&(nodes[i]->procs));
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
srcproc = (orte_rmaps_base_proc_t*)item; srcproc = (orte_mapped_proc_t*)item;
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)srcproc, if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)srcproc,
1, ORTE_MAPPED_PROC))) { 1, ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -34,10 +34,10 @@
/* /*
* JOB_MAP * JOB_MAP
*/ */
int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t *src, orte_data_type_t type) int orte_rmaps_base_print_map(char **output, char *prefix, orte_job_map_t *src, orte_data_type_t type)
{ {
char *tmp, *tmp2, *tmp3, *pfx, *pfx2; char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
orte_rmaps_base_node_t *srcnode; orte_mapped_node_t *srcnode;
orte_std_cntr_t i, num_nodes; orte_std_cntr_t i, num_nodes;
opal_list_item_t *item; opal_list_item_t *item;
int rc; int rc;
@ -52,32 +52,22 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t
asprintf(&pfx2, "%s", prefix); asprintf(&pfx2, "%s", prefix);
} }
asprintf(&tmp, "%sMap for app_context:", pfx2); asprintf(&tmp, "%sMap for job: %ld\tNum app_contexts: %ld", pfx2, (long)src->job, (long)src->num_apps);
asprintf(&pfx, "%s\t", pfx2); asprintf(&pfx, "%s\t", pfx2);
free(pfx2); free(pfx2);
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->app, ORTE_APP_CONTEXT))) { for (i=0; i < src->num_apps; i++) {
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->apps[i], ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(pfx); free(pfx);
free(tmp); free(tmp);
return rc; return rc;
} }
asprintf(&tmp3, "%s\n%s\n%sNum elements in procs array: %ld", tmp, tmp2, pfx, (long)src->num_procs); asprintf(&tmp3, "%s\n%s", tmp, tmp2);
free(tmp); free(tmp);
free(tmp2); free(tmp2);
tmp = tmp3;
for (i=0; i < src->num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_proc(&tmp, pfx, src->procs[i], ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc);
free(pfx);
free(tmp3);
return rc;
}
asprintf(&tmp2, "%s\n%s", tmp3, tmp);
free(tmp);
free(tmp3);
tmp3 = tmp2;
} }
num_nodes = (orte_std_cntr_t)opal_list_get_size(&(src->nodes)); num_nodes = (orte_std_cntr_t)opal_list_get_size(&(src->nodes));
@ -86,7 +76,7 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t
for (item = opal_list_get_first(&(src->nodes)); for (item = opal_list_get_first(&(src->nodes));
item != opal_list_get_end(&(src->nodes)); item != opal_list_get_end(&(src->nodes));
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
srcnode = (orte_rmaps_base_node_t*)item; srcnode = (orte_mapped_node_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_node(&tmp2, pfx, srcnode, ORTE_MAPPED_NODE))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_node(&tmp2, pfx, srcnode, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(pfx); free(pfx);
@ -110,7 +100,7 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t
/* /*
* MAPPED_PROC * MAPPED_PROC
*/ */
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_base_proc_t *src, orte_data_type_t type) int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_mapped_proc_t *src, orte_data_type_t type)
{ {
char *tmp, *tmp2, *tmp3, *pfx, *pfx2; char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
int rc; int rc;
@ -125,35 +115,18 @@ int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_ba
asprintf(&pfx2, "%s", prefix); asprintf(&pfx2, "%s", prefix);
} }
asprintf(&tmp, "%sMapped proc:", pfx2); asprintf(&tmp3, "%sMapped proc:\n%s\tProc Name:", pfx2, pfx2);
asprintf(&pfx, "%s\t", pfx2); asprintf(&pfx, "%s\t", pfx2);
if (NULL != src->app) { if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, &(src->name), ORTE_NAME))) {
asprintf(&tmp2, "%s\n%sApp name: %s", tmp, pfx, src->app);
} else {
asprintf(&tmp2, "%s\n%sApplication has NULL name", tmp, pfx);
}
free(tmp);
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_node(&tmp, pfx, src->proc_node, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
free(pfx);
free(tmp2);
return rc;
}
asprintf(&tmp3, "%s\n%s\n%s\n%sProc Name:", tmp2, pfx, tmp, pfx);
free(tmp2);
free(tmp);
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, &(src->proc_name), ORTE_NAME))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(pfx); free(pfx);
free(tmp3); free(tmp3);
return rc; return rc;
} }
asprintf(&tmp, "%s\n%s\n%sProc Rank: %ld\tPLS pid: %ld\tLocal PID: %ld\n", tmp3, tmp2, pfx, asprintf(&tmp, "%s\n%s\n%sProc Rank: %ld\tProc PID: %ld\tApp_context index: %ld\n", tmp3, tmp2, pfx,
(long)src->proc_rank, (long)src->pid, (long)src->local_pid); (long)src->rank, (long)src->pid, (long)src->app_idx);
free(tmp2); free(tmp2);
free(tmp3); free(tmp3);
@ -168,15 +141,13 @@ int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_ba
/* /*
* MAPPED_NODE * MAPPED_NODE
*/ */
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_base_node_t *src, orte_data_type_t type) int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_node_t *src, orte_data_type_t type)
{ {
int rc; int rc;
char *tmp, *tmp2, *tmp3, *pfx, *pfx2; char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
orte_std_cntr_t num_procs; orte_std_cntr_t num_procs;
#if 0
opal_list_item_t *item; opal_list_item_t *item;
orte_rmaps_base_proc_t *srcproc; orte_mapped_proc_t *srcproc;
#endif
/* set default result */ /* set default result */
*output = NULL; *output = NULL;
@ -188,27 +159,30 @@ int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_ba
asprintf(&pfx2, "%s", prefix); asprintf(&pfx2, "%s", prefix);
} }
asprintf(&tmp, "%sMapped node:", pfx2); asprintf(&tmp, "%sMapped node:\n%s\tCell: %ld\tNodename: %s\tUsername: %s\n%s\tDaemon name:", pfx2, pfx2,
(long)src->cell, (NULL == src->nodename ? "NULL" : src->nodename),
(NULL == src->username ? "NULL" : src->username), pfx2);
asprintf(&pfx, "%s\t", pfx2); asprintf(&pfx, "%s\t", pfx2);
free(pfx2); free(pfx2);
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->node, ORTE_RAS_NODE))) { if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp2, pfx, src->daemon, ORTE_NAME))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(pfx); free(pfx);
free(tmp); free(tmp);
return rc; return rc;
} }
num_procs = (orte_std_cntr_t)opal_list_get_size(&(src->node_procs)); num_procs = (orte_std_cntr_t)opal_list_get_size(&(src->procs));
asprintf(&tmp3, "%s\n%s\n%sNum elements in procs list: %ld", tmp, tmp2, pfx, (long)num_procs); asprintf(&tmp3, "%s\n\t%s\n%sOversubscribed: %s\tNum elements in procs list: %ld", tmp, tmp2, pfx,
(src->oversubscribed ? "True" : "False"), (long)num_procs);
free(tmp); free(tmp);
free(tmp2); free(tmp2);
#if 0
for (item = opal_list_get_first(&(src->node_procs)); for (item = opal_list_get_first(&(src->procs));
item != opal_list_get_end(&(src->node_procs)); item != opal_list_get_end(&(src->procs));
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
srcproc = (orte_rmaps_base_proc_t*)item; srcproc = (orte_mapped_proc_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_proc(&tmp2, pfx, srcproc, ORTE_MAPPED_PROC))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_print_mapped_proc(&tmp2, pfx, srcproc, ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(pfx); free(pfx);
@ -220,7 +194,7 @@ int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_ba
free(tmp2); free(tmp2);
tmp3 = tmp; tmp3 = tmp;
} }
#endif
/* set the return */ /* set the return */
*output = tmp3; *output = tmp3;

Просмотреть файл

@ -32,10 +32,10 @@
/* /*
* JOB_MAP * JOB_MAP
*/ */
int orte_rmaps_base_size_map(size_t *size, orte_rmaps_base_map_t *src, orte_data_type_t type) int orte_rmaps_base_size_map(size_t *size, orte_job_map_t *src, orte_data_type_t type)
{ {
/* account for the object itself */ /* account for the object itself */
*size = sizeof(orte_rmaps_base_map_t); *size = sizeof(orte_job_map_t);
/* if src is NULL, then that's all we wanted */ /* if src is NULL, then that's all we wanted */
if (NULL == src) return ORTE_SUCCESS; if (NULL == src) return ORTE_SUCCESS;
@ -46,10 +46,10 @@ int orte_rmaps_base_size_map(size_t *size, orte_rmaps_base_map_t *src, orte_data
/* /*
* MAPPED_PROC * MAPPED_PROC
*/ */
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_rmaps_base_proc_t *src, orte_data_type_t type) int orte_rmaps_base_size_mapped_proc(size_t *size, orte_mapped_proc_t *src, orte_data_type_t type)
{ {
/* account for the object itself */ /* account for the object itself */
*size = sizeof(orte_rmaps_base_proc_t); *size = sizeof(orte_mapped_proc_t);
/* if src is NULL, then that's all we wanted */ /* if src is NULL, then that's all we wanted */
if (NULL == src) return ORTE_SUCCESS; if (NULL == src) return ORTE_SUCCESS;
@ -60,10 +60,10 @@ int orte_rmaps_base_size_mapped_proc(size_t *size, orte_rmaps_base_proc_t *src,
/* /*
* MAPPED_NODE * MAPPED_NODE
*/ */
int orte_rmaps_base_size_mapped_node(size_t *size, orte_rmaps_base_node_t *src, orte_data_type_t type) int orte_rmaps_base_size_mapped_node(size_t *size, orte_mapped_node_t *src, orte_data_type_t type)
{ {
/* account for the object itself */ /* account for the object itself */
*size = sizeof(orte_rmaps_base_node_t); *size = sizeof(orte_mapped_node_t);
/* if src is NULL, then that's all we wanted */ /* if src is NULL, then that's all we wanted */
if (NULL == src) return ORTE_SUCCESS; if (NULL == src) return ORTE_SUCCESS;

Просмотреть файл

@ -40,49 +40,48 @@ int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest,
{ {
int rc; int rc;
orte_std_cntr_t i, j, n, num_nodes; orte_std_cntr_t i, j, n, num_nodes;
orte_rmaps_base_map_t **maps; orte_job_map_t **maps;
orte_rmaps_base_node_t *node; orte_mapped_node_t *node;
/* unpack into array of orte_rmaps_base_map_t objects */ /* unpack into array of orte_job_map_t objects */
maps = (orte_rmaps_base_map_t**) dest; maps = (orte_job_map_t**) dest;
for (i=0; i < *num_vals; i++) { for (i=0; i < *num_vals; i++) {
/* create the orte_rmaps_base_map_t object */ /* create the orte_rmaps_base_map_t object */
maps[i] = OBJ_NEW(orte_rmaps_base_map_t); maps[i] = OBJ_NEW(orte_job_map_t);
if (NULL == maps[i]) { if (NULL == maps[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* unpack the app_context */ /* unpack the jobid */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->app), &n, ORTE_APP_CONTEXT))) { &(maps[i]->job), &n, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* unpack the number of procs */ /* unpack the number of app_contexts */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->num_procs), &n, ORTE_STD_CNTR))) { &(maps[i]->num_apps), &n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* if we have some, allocate space for them */ /* allocate space for them */
if (0 < maps[i]->num_procs) { maps[i]->apps = (orte_app_context_t**)malloc(maps[i]->num_apps * sizeof(orte_app_context_t*));
maps[i]->procs = (orte_rmaps_base_proc_t**)malloc(maps[i]->num_procs * sizeof(orte_rmaps_base_proc_t*)); if (NULL == maps[i]->apps) {
if (NULL == maps[i]->procs) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* and unpack them */ /* unpack the app_context */
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, maps[i]->procs, &(maps[i]->num_procs), ORTE_MAPPED_PROC))) { if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->apps), &(maps[i]->num_apps), ORTE_APP_CONTEXT))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
}
/* unpack the number of nodes */ /* unpack the number of nodes */
n = 1; n = 1;
@ -112,39 +111,23 @@ int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
{ {
int rc; int rc;
orte_std_cntr_t i, n; orte_std_cntr_t i, n;
orte_rmaps_base_proc_t **procs; orte_mapped_proc_t **procs;
/* unpack into array of orte_rmaps_base_proc_t objects */ /* unpack into array of orte_mapped_proc_t objects */
procs = (orte_rmaps_base_proc_t**) dest; procs = (orte_mapped_proc_t**) dest;
for (i=0; i < *num_vals; i++) { for (i=0; i < *num_vals; i++) {
/* create the orte_rmaps_base_proc_t object */ /* create the orte_mapped_proc_t object */
procs[i] = OBJ_NEW(orte_rmaps_base_proc_t); procs[i] = OBJ_NEW(orte_mapped_proc_t);
if (NULL == procs[i]) { if (NULL == procs[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* unpack the app name */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->app), &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the proc_node */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->proc_node), &n, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the proc name */ /* unpack the proc name */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->proc_name), &n, ORTE_NAME))) { &(procs[i]->name), &n, ORTE_NAME))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
@ -152,12 +135,12 @@ int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
/* unpack the rank */ /* unpack the rank */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->proc_rank), &n, ORTE_STD_CNTR))) { &(procs[i]->rank), &n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* unpack the pls-pid */ /* unpack the pid */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->pid), &n, ORTE_PID))) { &(procs[i]->pid), &n, ORTE_PID))) {
@ -165,10 +148,10 @@ int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
return rc; return rc;
} }
/* unpack the local pid */ /* unpack the app_idx */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(procs[i]->local_pid), &n, ORTE_PID))) { &(procs[i]->app_idx), &n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
@ -185,24 +168,56 @@ int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
{ {
int rc; int rc;
orte_std_cntr_t i, j, n, num_procs; orte_std_cntr_t i, j, n, num_procs;
orte_rmaps_base_node_t **nodes; orte_mapped_node_t **nodes;
orte_rmaps_base_proc_t *srcproc; orte_mapped_proc_t *srcproc;
/* unpack into array of orte_rmaps_base_node_t objects */ /* unpack into array of orte_mapped_node_t objects */
nodes = (orte_rmaps_base_node_t**) dest; nodes = (orte_mapped_node_t**) dest;
for (i=0; i < *num_vals; i++) { for (i=0; i < *num_vals; i++) {
/* create the orte_rmaps_base_node_t object */ /* create the orte_rmaps_base_node_t object */
nodes[i] = OBJ_NEW(orte_rmaps_base_node_t); nodes[i] = OBJ_NEW(orte_mapped_node_t);
if (NULL == nodes[i]) { if (NULL == nodes[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* unpack the node object */ /* unpack the cellid */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->node), &n, ORTE_RAS_NODE))) { &(nodes[i]->cell), &n, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the nodename */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->nodename), &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the username */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->username), &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the daemon's name */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->daemon), &n, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the oversubscribed flag */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(nodes[i]->oversubscribed), &n, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
@ -222,7 +237,7 @@ int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
opal_list_append(&(nodes[i]->node_procs), &srcproc->super); opal_list_append(&(nodes[i]->procs), &srcproc->super);
} }
} }
} }

Просмотреть файл

@ -1,903 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/util/output.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/schema/schema.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/ras.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/smr/smr_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/base/base.h"
/**
* orte_rmaps_base_node_t
*/
static void orte_rmaps_base_node_construct(orte_rmaps_base_node_t* node)
{
node->node = NULL;
OBJ_CONSTRUCT(&node->node_procs, opal_list_t);
}
static void orte_rmaps_base_node_destruct(orte_rmaps_base_node_t* node)
{
opal_list_item_t* item;
if(NULL != node->node) {
OBJ_RELEASE(node->node);
node->node = NULL;
}
while(NULL != (item = opal_list_remove_first(&node->node_procs))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node->node_procs);
}
OBJ_CLASS_INSTANCE(
orte_rmaps_base_node_t,
opal_list_item_t,
orte_rmaps_base_node_construct,
orte_rmaps_base_node_destruct);
/**
* orte_rmaps_base_proc_t
*/
static void orte_rmaps_base_proc_construct(orte_rmaps_base_proc_t* proc)
{
proc->app = NULL;
proc->proc_node = NULL;
proc->pid = 0;
proc->local_pid = 0;
}
static void orte_rmaps_base_proc_destruct(orte_rmaps_base_proc_t* proc)
{
if (NULL != proc->app) {
free(proc->app);
proc->app = NULL;
}
}
OBJ_CLASS_INSTANCE(
orte_rmaps_base_proc_t,
opal_list_item_t,
orte_rmaps_base_proc_construct,
orte_rmaps_base_proc_destruct);
/**
* orte_rmaps_base_map_t
*/
static void orte_rmaps_base_map_construct(orte_rmaps_base_map_t* map)
{
map->app = NULL;
map->procs = NULL;
map->num_procs = 0;
OBJ_CONSTRUCT(&map->nodes, opal_list_t);
}
static void orte_rmaps_base_map_destruct(orte_rmaps_base_map_t* map)
{
orte_std_cntr_t i=0;
opal_list_item_t* item;
for(i=0; i<map->num_procs; i++) {
OBJ_RELEASE(map->procs[i]);
}
while(NULL != (item = opal_list_remove_first(&map->nodes)))
OBJ_RELEASE(item);
if(NULL != map->procs) {
free(map->procs);
map->procs = NULL;
}
if(NULL != map->app) {
OBJ_RELEASE(map->app);
map->app = NULL;
}
OBJ_DESTRUCT(&map->nodes);
}
OBJ_CLASS_INSTANCE(
orte_rmaps_base_map_t,
opal_list_item_t,
orte_rmaps_base_map_construct,
orte_rmaps_base_map_destruct);
/*
* Compare two proc entries
*/
static int orte_rmaps_value_compare(orte_gpr_value_t** val1, orte_gpr_value_t** val2)
{
orte_std_cntr_t i;
orte_std_cntr_t app1 = 0;
orte_std_cntr_t app2 = 0;
orte_std_cntr_t rank1 = 0;
orte_std_cntr_t rank2 = 0;
orte_std_cntr_t *sptr;
orte_gpr_value_t* value;
int rc;
for(i=0, value=*val1; i<value->cnt; i++) {
orte_gpr_keyval_t* keyval = value->keyvals[i];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
}
rank1 = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
}
app1 = *sptr;
continue;
}
}
for(i=0, value=*val2; i<value->cnt; i++) {
orte_gpr_keyval_t* keyval = value->keyvals[i];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
}
rank2 = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
}
app2 = *sptr;
continue;
}
}
if(app1 < app2)
return -1;
if(app1 > app2)
return +1;
if(rank1 < rank2)
return -1;
if(rank1 > rank2)
return +1;
return 0;
}
/**
* Obtain the mapping for this job, and the list of nodes confined to that mapping.
*
* Use this instead of orte_ras_base_node_query when past the RMAPS framework
* since components like the PLS are only conserned with those nodes that they
* been mapped on, not all of the nodes allocated to their job. In the case
* where we are allocated 10 nodes from the RAS, but only map to 2 of them
* then we don't try to launch orteds on all 10 nodes, just the 2 mapped.
*/
int orte_rmaps_base_mapped_node_query(opal_list_t* mapping_list, opal_list_t* nodes_alloc, orte_jobid_t jobid)
{
opal_list_item_t *item_a, *item_m, *item_n;
int num_mapping = 0;
int rc = ORTE_SUCCESS;
bool matched = false;
/* get the mapping for this job */
rc = orte_rmaps_base_get_map(jobid, mapping_list);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
num_mapping = opal_list_get_size(mapping_list);
/* Create a list of nodes that are in the mapping */
for( item_m = opal_list_get_first(mapping_list);
item_m != opal_list_get_end(mapping_list);
item_m = opal_list_get_next(item_m)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item_m;
/* Iterate over all the nodes mapped and check them against the
* allocated node list */
for( item_n = opal_list_get_first(&(map->nodes));
item_n != opal_list_get_end(&(map->nodes));
item_n = opal_list_get_next(item_n)) {
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)item_n;
matched = false;
/* If this node is in the list already, skip it */
if(num_mapping > 1) {
for( item_a = opal_list_get_first(nodes_alloc);
item_a != opal_list_get_end(nodes_alloc);
item_a = opal_list_get_next(item_a)) {
orte_ras_node_t* ras_node = (orte_ras_node_t*)item_a;
if( rmaps_node->node == ras_node) {
matched = true;
break;
}
}
if(matched) {
continue;
}
}
/* Otherwise
* - Add it to the allocated list of nodes
*/
OBJ_RETAIN(rmaps_node->node);
opal_list_append(nodes_alloc, &rmaps_node->node->super);
}
}
return rc;
}
/**
* Lookup node (if it exists) in the list. If it doesn't exist, create a new
* node and append to the table.
*/
static orte_rmaps_base_node_t*
orte_rmaps_lookup_node(opal_list_t* rmaps_nodes, opal_list_t* ras_nodes, char* node_name, orte_rmaps_base_proc_t* proc)
{
opal_list_item_t* item;
for(item = opal_list_get_first(rmaps_nodes);
item != opal_list_get_end(rmaps_nodes);
item = opal_list_get_next(item)) {
orte_rmaps_base_node_t* node = (orte_rmaps_base_node_t*)item;
if(strcmp(node->node->node_name, node_name) == 0) {
OBJ_RETAIN(proc);
opal_list_append(&node->node_procs, &proc->super);
return node;
}
}
for(item = opal_list_get_first(ras_nodes);
item != opal_list_get_end(ras_nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* ras_node = (orte_ras_node_t*)item;
if(strcmp(ras_node->node_name, node_name) == 0) {
orte_rmaps_base_node_t* node = OBJ_NEW(orte_rmaps_base_node_t);
OBJ_RETAIN(ras_node);
node->node = ras_node;
OBJ_RETAIN(proc);
opal_list_append(&node->node_procs, &proc->super);
opal_list_prepend(rmaps_nodes, &node->super);
return node;
}
}
return NULL;
}
/**
* Query the process mapping from the registry.
*/
int orte_rmaps_base_get_map(orte_jobid_t jobid, opal_list_t* mapping_list)
{
orte_app_context_t** app_context = NULL;
orte_rmaps_base_map_t** mapping = NULL;
opal_list_t nodes;
opal_list_item_t* item;
orte_std_cntr_t i, num_context = 0;
orte_std_cntr_t *sptr;
orte_process_name_t *pptr;
pid_t *pidptr;
char* segment = NULL;
orte_gpr_value_t** values;
orte_std_cntr_t v, num_values;
int rc;
char* keys[] = {
ORTE_PROC_RANK_KEY,
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_PROC_PID_KEY,
ORTE_PROC_LOCAL_PID_KEY,
ORTE_NODE_NAME_KEY,
NULL
};
/* query the application context */
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &app_context, &num_context))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* query the node list */
OBJ_CONSTRUCT(&nodes, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(&nodes,jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* build the mapping */
if(NULL == (mapping = (orte_rmaps_base_map_t**)malloc(sizeof(orte_rmaps_base_map_t*) * num_context))) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(i=0; i<num_context; i++) {
orte_rmaps_base_map_t* map = OBJ_NEW(orte_rmaps_base_map_t);
orte_app_context_t* app = app_context[i];
map->app = app;
if (0 < app->num_procs) {
map->procs = (orte_rmaps_base_proc_t**)malloc(sizeof(orte_rmaps_base_proc_t*) * app->num_procs);
if(NULL == map->procs) {
OBJ_RELEASE(map);
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
} else {
if (1 < num_context) { /** can't have multiple contexts if zero num_procs */
ORTE_ERROR_LOG(ORTE_ERR_INVALID_NUM_PROCS);
rc = ORTE_ERR_INVALID_NUM_PROCS;
goto cleanup;
}
}
map->num_procs = 0;
mapping[i] = map;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* query the process list from the registry */
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
segment,
NULL,
keys,
&num_values,
&values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* sort the response */
qsort(values, num_values, sizeof(orte_gpr_value_t*),
(int (*)(const void*,const void*))orte_rmaps_value_compare);
/* build the proc list */
for(v=0; v<num_values; v++) {
orte_gpr_value_t* value = values[v];
orte_rmaps_base_map_t* map = NULL;
orte_rmaps_base_proc_t* proc;
char* node_name = NULL;
orte_std_cntr_t kv, app_index;
proc = OBJ_NEW(orte_rmaps_base_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(kv = 0; kv<value->cnt; kv++) {
orte_gpr_keyval_t* keyval = value->keyvals[kv];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->proc_rank = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->proc_name = *pptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
app_index = *sptr;
if(app_index >= num_context) {
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map = mapping[app_index];
proc->app = strdup(app_context[app_index]->app);
continue;
}
if (strcmp(keyval->key, ORTE_PROC_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->pid = *pidptr;
continue;
}
if (strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->local_pid = *pidptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
}
/* global record */
if(NULL == map) {
OBJ_RELEASE(proc);
continue;
}
/*
* This seems like a dummy check, but it ensures that we fail
* rather than overrun our array. This can happen if the
* indicies on the app schemas are incorrect
*/
if(map->num_procs < map->app->num_procs) {
map->procs[map->num_procs++] = proc;
proc->proc_node = orte_rmaps_lookup_node(&map->nodes, &nodes, node_name, proc);
}
else {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup;
}
}
/* cleanup any nodes allocated and not mapped */
while(NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
/* release temporary variables */
for(i=0; i<num_context; i++) {
opal_list_append(mapping_list, &mapping[i]->super);
}
free(segment);
free(app_context);
free(mapping);
return ORTE_SUCCESS;
cleanup:
if(NULL != segment)
free(segment);
if(NULL != app_context) {
for(i=0; i<num_context; i++) {
OBJ_RELEASE(app_context[i]);
}
free(app_context);
}
if(NULL != mapping) {
for(i=0; i<num_context; i++) {
if(NULL != mapping[i])
OBJ_RELEASE(mapping[i]);
}
free(mapping);
}
/* cleanup any nodes allocated and not mapped */
while(NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
return rc;
}
/**
* Query the process mapping for a specified node from the registry.
*/
int orte_rmaps_base_get_node_map(
orte_cellid_t cellid,
orte_jobid_t jobid,
const char* hostname,
opal_list_t* mapping_list)
{
orte_app_context_t** app_context = NULL;
orte_rmaps_base_map_t** mapping = NULL;
orte_ras_node_t *ras_node = NULL;
orte_gpr_keyval_t *condition;
orte_std_cntr_t i, num_context = 0;
orte_std_cntr_t *sptr;
pid_t *pidptr;
orte_process_name_t *pptr;
char* segment = NULL;
orte_gpr_value_t** values;
orte_std_cntr_t v, num_values;
int rc;
char* keys[] = {
ORTE_PROC_RANK_KEY,
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_PROC_PID_KEY,
ORTE_PROC_LOCAL_PID_KEY,
ORTE_NODE_NAME_KEY,
NULL
};
/* allocate the node */
if(NULL == (ras_node = orte_ras.node_lookup(cellid,hostname))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
/* query the application context */
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &app_context, &num_context))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(NULL == (mapping = (orte_rmaps_base_map_t**)malloc(sizeof(orte_rmaps_base_map_t*) * num_context))) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(i=0; i<num_context; i++) {
orte_rmaps_base_map_t* map = OBJ_NEW(orte_rmaps_base_map_t);
orte_app_context_t* app = app_context[i];
OBJ_RETAIN(app);
map->app = app;
map->procs = (orte_rmaps_base_proc_t**)malloc(sizeof(orte_rmaps_base_proc_t*) * app->num_procs);
if(NULL == map->procs) {
OBJ_RELEASE(map);
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map->num_procs = 0;
mapping[i] = map;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* setup condition/filter for query - return only processes that
* are assigned to the specified node name
*/
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&condition, ORTE_NODE_NAME_KEY, ORTE_STRING, (void*)hostname))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* query the process list from the registry */
rc = orte_gpr.get_conditional(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
segment,
NULL,
keys,
1,
&condition,
&num_values,
&values);
/* sort the response */
qsort(values, num_values, sizeof(orte_gpr_value_t*),
(int (*)(const void*,const void*))orte_rmaps_value_compare);
/* build the proc list */
for(v=0; v<num_values; v++) {
orte_gpr_value_t* value = values[v];
orte_rmaps_base_map_t* map = NULL;
orte_rmaps_base_node_t *node = NULL;
orte_rmaps_base_proc_t* proc;
char* node_name = NULL;
orte_std_cntr_t kv, app_index;
proc = OBJ_NEW(orte_rmaps_base_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(kv = 0; kv<value->cnt; kv++) {
orte_gpr_keyval_t* keyval = value->keyvals[kv];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->proc_rank = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->proc_name = *pptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
app_index = *sptr;
if(app_index >= num_context) {
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
map = mapping[app_index];
if(opal_list_get_size(&map->nodes) == 0) {
node = OBJ_NEW(orte_rmaps_base_node_t);
if(NULL == node) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto cleanup;
}
OBJ_RETAIN(ras_node);
node->node = ras_node;
opal_list_append(&map->nodes, &node->super);
} else {
node = (orte_rmaps_base_node_t*)opal_list_get_first(&map->nodes);
}
proc->app = strdup(app_context[app_index]->app);
continue;
}
if (strcmp(keyval->key, ORTE_PROC_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->pid = *pidptr;
continue;
}
if (strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->local_pid = *pidptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
}
/* skip this entry? */
if(NULL == map ||
proc->proc_name.cellid != cellid) {
OBJ_RELEASE(proc);
continue;
}
map->procs[map->num_procs++] = proc;
OBJ_RETAIN(proc);
opal_list_append(&node->node_procs, &proc->super);
proc->proc_node = node;
}
/* return mapping for the entries that have procs on this node */
for(i=0; i<num_context; i++) {
orte_rmaps_base_map_t* map = mapping[i];
if(map->num_procs) {
opal_list_append(mapping_list, &map->super);
} else {
OBJ_RELEASE(map);
}
}
/* decrement reference count on node */
OBJ_RELEASE(ras_node);
/* release all app context - note the reference count was bumped
* if saved in the map
*/
for(i=0; i<num_context; i++) {
OBJ_RELEASE(app_context[i]);
}
free(segment);
free(app_context);
free(mapping);
OBJ_RELEASE(condition);
return ORTE_SUCCESS;
cleanup:
if(NULL != segment)
free(segment);
if(NULL != app_context) {
for(i=0; i<num_context; i++) {
OBJ_RELEASE(app_context[i]);
}
free(app_context);
}
if(NULL != mapping) {
for(i=0; i<num_context; i++) {
if(NULL != mapping[i])
OBJ_RELEASE(mapping[i]);
}
free(mapping);
}
if (NULL != condition)
OBJ_RELEASE(condition);
return rc;
}
/**
* Set the process mapping in the registry.
*/
int orte_rmaps_base_set_map(orte_jobid_t jobid, opal_list_t* mapping_list)
{
orte_std_cntr_t i, j;
orte_std_cntr_t index=0;
orte_std_cntr_t num_procs = 0;
int rc = ORTE_SUCCESS;
opal_list_item_t* item;
orte_gpr_value_t** values;
char *segment;
for(item = opal_list_get_first(mapping_list);
item != opal_list_get_end(mapping_list);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item;
num_procs += map->num_procs;
}
if(num_procs == 0) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/**
* allocate value array. We need to reserve one extra spot so we can set the counter
* for the process INIT state to indicate that all procs are at that state. This will
* allow the INIT trigger to fire.
*/
values = (orte_gpr_value_t**)malloc((1+num_procs) * sizeof(orte_gpr_value_t*));
if(NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment,jobid))) {
ORTE_ERROR_LOG(rc);
free(values);
return rc;
}
/** setup the last value in the array to update the INIT counter */
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[num_procs]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 1, 1))) {
ORTE_ERROR_LOG(rc);
free(values);
free(segment);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[num_procs]->keyvals[0]), ORTE_PROC_NUM_AT_INIT, ORTE_STD_CNTR, &num_procs))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
values[num_procs]->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* counter is in the job's globals container */
for(i=0; i<num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 7, 0))) {
ORTE_ERROR_LOG(rc);
for(j=0; j<i; j++) {
OBJ_RELEASE(values[j]);
}
free(values);
free(segment);
return rc;
}
}
/* iterate through all processes and initialize value array */
for(item = opal_list_get_first(mapping_list);
item != opal_list_get_end(mapping_list);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item;
orte_std_cntr_t p;
for(p=0; p<map->num_procs; p++) {
orte_rmaps_base_proc_t* proc = map->procs[p];
orte_gpr_value_t* value = values[index++];
orte_proc_state_t proc_state=ORTE_PROC_STATE_INIT;
/* initialize keyvals */
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_PROC_RANK_KEY, ORTE_STD_CNTR, &(proc->proc_rank)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_PROC_NAME_KEY, ORTE_NAME, &(proc->proc_name)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_NODE_NAME_KEY, ORTE_STRING, proc->proc_node->node->node_name))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &(map->app->idx)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[5]), ORTE_PROC_PID_KEY, ORTE_PID, &(proc->pid)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[6]), ORTE_PROC_LOCAL_PID_KEY, ORTE_PID, &(proc->local_pid)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* set the tokens */
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(value->tokens), &(value->num_tokens), &(proc->proc_name)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
}
/* insert all values in one call */
if (ORTE_SUCCESS != (rc = orte_gpr.put((1+num_procs), values))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
for(i=0; i<num_procs; i++) {
if(NULL != values[i]) {
OBJ_RELEASE(values[i]);
}
}
if(NULL != values)
free(values);
return rc;
}

Просмотреть файл

@ -42,7 +42,7 @@ static orte_rmaps_base_module_t *select_any(void);
* Function for selecting one component from all those that are * Function for selecting one component from all those that are
* available. * available.
*/ */
int orte_rmaps_base_map(orte_jobid_t job, char *desired_mapper) int orte_rmaps_base_map_job(orte_jobid_t job, char *desired_mapper)
{ {
orte_rmaps_base_module_t *module=NULL; orte_rmaps_base_module_t *module=NULL;
int rc; int rc;

Просмотреть файл

@ -1,35 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/class/opal_list.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
int orte_rmaps_base_map_no_op(orte_jobid_t job, char *desired_mapper)
{
return ORTE_ERR_NOT_SUPPORTED;
}

Просмотреть файл

@ -50,14 +50,17 @@ orte_rmaps_base_t orte_rmaps_base;
* Declare the RMAPS module to hold the API function pointers * Declare the RMAPS module to hold the API function pointers
*/ */
orte_rmaps_base_module_t orte_rmaps = { orte_rmaps_base_module_t orte_rmaps = {
orte_rmaps_base_map, orte_rmaps_base_map_job,
orte_rmaps_base_get_job_map,
orte_rmaps_base_get_node_map,
orte_rmaps_base_finalize orte_rmaps_base_finalize
}; };
orte_rmaps_base_module_t orte_rmaps_no_op = { /*
orte_rmaps_base_map_no_op, * Include all the RMAPS class instance declarations
orte_rmaps_base_finalize */
}; #include "orte/mca/rmaps/base/rmaps_class_instances.h"
/** /**
* Function for finding and opening either all MCA components, or the one * Function for finding and opening either all MCA components, or the one
@ -66,7 +69,7 @@ orte_rmaps_base_module_t orte_rmaps_no_op = {
int orte_rmaps_base_open(void) int orte_rmaps_base_open(void)
{ {
int param, rc, value; int param, rc, value;
char *policy, *requested; char *policy;
orte_data_type_t tmp; orte_data_type_t tmp;
/* Debugging / verbose output */ /* Debugging / verbose output */
@ -150,30 +153,7 @@ int orte_rmaps_base_open(void)
} }
/* Some systems do not want any RMAPS support. In those cases,
* memory consumption is also an issue. For those systems, we
* avoid opening the RMAPS components by checking for a directive
* to use the "null" component.
*/
param = mca_base_param_reg_string_name("rmaps", NULL, NULL,
false, false, NULL, NULL);
if (ORTE_ERROR == mca_base_param_lookup_string(param, &requested)) {
return ORTE_ERROR;
}
if (NULL != requested && 0 == strcmp(requested, "null")) {
/* the user has specifically requested that we use the "null"
* component. In this case, that means we do NOT open any
* components, and we simply use the default module we have
* already defined above
*/
orte_rmaps_base.no_op_selected = true;
orte_rmaps = orte_rmaps_no_op; /* use the no_op module */
return ORTE_SUCCESS;
}
orte_rmaps_base.no_op_selected = false;
/* Open up all the components that we can find */ /* Open up all the components that we can find */
if (ORTE_SUCCESS != if (ORTE_SUCCESS !=
mca_base_components_open("rmaps", orte_rmaps_base.rmaps_output, mca_base_components_open("rmaps", orte_rmaps_base.rmaps_output,
mca_rmaps_base_static_components, mca_rmaps_base_static_components,

413
orte/mca/rmaps/base/rmaps_base_registry_fns.c Обычный файл
Просмотреть файл

@ -0,0 +1,413 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/util/output.h"
#include "opal/util/trace.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/schema/schema.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/smr/smr_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/base/base.h"
/*
* Query the process mapping from the registry.
*/
int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
{
orte_job_map_t *mapping;
orte_mapped_proc_t *proc;
orte_cellid_t *cellptr, cell;
orte_std_cntr_t *sptr;
bool *bptr, oversub;
pid_t *pidptr;
orte_process_name_t *pptr;
char *segment;
char *node_name;
char *username;
orte_gpr_value_t **values, *value;
orte_gpr_keyval_t* keyval;
orte_std_cntr_t v, kv, num_values;
int rc;
char* keys[] = {
ORTE_PROC_RANK_KEY,
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_PROC_LOCAL_PID_KEY,
ORTE_CELLID_KEY,
ORTE_NODE_NAME_KEY,
ORTE_NODE_USERNAME_KEY,
ORTE_NODE_OVERSUBSCRIBED_KEY,
NULL
};
OPAL_TRACE(1);
/* define default answer */
*map = NULL;
/* create the object */
mapping = OBJ_NEW(orte_job_map_t);
if (NULL == mapping) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* store the jobid */
mapping->job = jobid;
/* get the job segment name */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(mapping);
return rc;
}
/* query the application context */
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &(mapping->apps), &(mapping->num_apps)))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* query the process list from the registry */
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
segment,
NULL,
keys,
&num_values,
&values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(mapping);
free(segment);
return rc;
}
free(segment);
/* build the node and proc lists. each value corresponds
* to a process in the map
*/
for(v=0; v<num_values; v++) {
value = values[v];
node_name = NULL;
proc = OBJ_NEW(orte_mapped_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
for(kv = 0; kv<value->cnt; kv++) {
keyval = value->keyvals[kv];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->rank = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->name = *pptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->app_idx = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->pid = *pidptr;
continue;
}
if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cellptr, keyval->value, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
cell = *cellptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_USERNAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&username, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_OVERSUBSCRIBED_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, keyval->value, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
oversub = *bptr;
continue;
}
}
/* store this process in the map */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(mapping, cell, node_name, username, oversub, proc))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (NULL != node_name) free(node_name);
}
/* all done */
*map = mapping;
return ORTE_SUCCESS;
cleanup:
OBJ_RELEASE(mapping);
for (v=0; v < num_values; v++) {
OBJ_RELEASE(values[v]);
}
if (NULL != values) free(values);
return rc;
}
int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell,
char *nodename, orte_jobid_t job)
{
orte_job_map_t *map;
opal_list_item_t *item;
orte_mapped_node_t *nptr;
int rc;
/* set default answer */
*node = NULL;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_job_map(&map, job))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* scan the map for the indicated node */
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
nptr = (orte_mapped_node_t*)item;
if (cell == nptr->cell && 0 == strcmp(nodename, nptr->nodename)) {
*node = nptr;
/* protect the node object from release when we get rid
* of the map object
*/
opal_list_remove_item(&map->nodes, item);
OBJ_RELEASE(map);
return ORTE_SUCCESS;
}
}
/* if we get here, then the node wasn't found */
OBJ_RELEASE(map);
return ORTE_ERR_NOT_FOUND;
}
/**
* Set the process mapping in the registry.
*/
int orte_rmaps_base_put_job_map(orte_job_map_t *map)
{
orte_std_cntr_t i, j;
orte_std_cntr_t index=0;
orte_std_cntr_t num_procs = 0;
int rc = ORTE_SUCCESS;
opal_list_item_t *item, *item2;
orte_gpr_value_t **values, *value;
char *segment;
orte_mapped_node_t *node;
orte_mapped_proc_t *proc;
orte_proc_state_t proc_state=ORTE_PROC_STATE_INIT;
OPAL_TRACE(2);
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
node = (orte_mapped_node_t*)item;
num_procs += opal_list_get_size(&node->procs);
}
if(num_procs == 0) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/**
* allocate value array. We need to reserve one extra spot so we can set the counter
* for the process INIT state to indicate that all procs are at that state. This will
* allow the INIT trigger to fire.
*/
values = (orte_gpr_value_t**)malloc((1+num_procs) * sizeof(orte_gpr_value_t*));
if(NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, map->job))) {
ORTE_ERROR_LOG(rc);
free(values);
return rc;
}
/** setup the last value in the array to update the INIT counter */
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[num_procs]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 1, 1))) {
ORTE_ERROR_LOG(rc);
free(values);
free(segment);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[num_procs]->keyvals[0]), ORTE_PROC_NUM_AT_INIT, ORTE_STD_CNTR, &num_procs))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
values[num_procs]->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* counter is in the job's globals container */
for(i=0; i<num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 8, 0))) {
ORTE_ERROR_LOG(rc);
for(j=0; j<i; j++) {
OBJ_RELEASE(values[j]);
}
free(values);
free(segment);
return rc;
}
}
/* iterate through all processes and initialize value array */
for(item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
node = (orte_mapped_node_t*)item;
for (item2 = opal_list_get_first(&node->procs);
item2 != opal_list_get_end(&node->procs);
item2 = opal_list_get_next(item2)) {
proc = (orte_mapped_proc_t*)item2;
value = values[index++];
/* initialize keyvals */
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_PROC_RANK_KEY, ORTE_STD_CNTR, &(proc->rank)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_PROC_NAME_KEY, ORTE_NAME, &(proc->name)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_CELLID_KEY, ORTE_CELLID, &(node->cell)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), ORTE_NODE_NAME_KEY, ORTE_STRING, node->nodename))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), ORTE_NODE_USERNAME_KEY, ORTE_STRING, node->username))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[5]), ORTE_NODE_OVERSUBSCRIBED_KEY, ORTE_BOOL, &(node->oversubscribed)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[6]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &(proc->app_idx)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[7]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* set the tokens */
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(value->tokens), &(value->num_tokens), &(proc->name)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
}
/* insert all values in one call */
if (ORTE_SUCCESS != (rc = orte_gpr.put((1+num_procs), values))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
for(i=0; i<num_procs; i++) {
if(NULL != values[i]) {
OBJ_RELEASE(values[i]);
}
}
if(NULL != values)
free(values);
return rc;
}

Просмотреть файл

@ -48,6 +48,7 @@ static bool are_all_mapped_valid(char **mapping,
opal_list_t* nodes) opal_list_t* nodes)
{ {
opal_list_item_t *item; opal_list_item_t *item;
orte_ras_node_t *node;
int i; int i;
bool matched; bool matched;
@ -57,7 +58,8 @@ static bool are_all_mapped_valid(char **mapping,
for(item = opal_list_get_first(nodes); for(item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes); item != opal_list_get_end(nodes);
item = opal_list_get_next(item) ) { item = opal_list_get_next(item) ) {
if( 0 == strcmp( ((orte_ras_node_t*) item)->node_name, mapping[i]) ) { node = (orte_ras_node_t*) item;
if( 0 == strcmp(node->node_name, mapping[i]) ) {
matched = true; matched = true;
break; break;
} }
@ -94,7 +96,7 @@ static bool is_mapped(opal_list_item_t *item,
/* /*
* Query the registry for all nodes allocated to a specified job * Query the registry for all nodes allocated to a specified job
*/ */
int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots) int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots)
{ {
opal_list_item_t *item, *next; opal_list_item_t *item, *next;
orte_ras_node_t *node; orte_ras_node_t *node;
@ -104,7 +106,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
/** set default answer */ /** set default answer */
*total_num_slots = 0; *total_num_slots = 0;
if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(nodes, jobid))) { /* get the allocation for this job */
if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(allocated_nodes, jobid))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
@ -115,21 +118,21 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
id = mca_base_param_find("rmaps", NULL, "base_schedule_local"); id = mca_base_param_find("rmaps", NULL, "base_schedule_local");
mca_base_param_lookup_int(id, &nolocal); mca_base_param_lookup_int(id, &nolocal);
if (0 == nolocal) { if (0 == nolocal) {
for (item = opal_list_get_first(nodes); for (item = opal_list_get_first(allocated_nodes);
item != opal_list_get_end(nodes); item != opal_list_get_end(allocated_nodes);
item = opal_list_get_next(item) ) { item = opal_list_get_next(item) ) {
if (0 == strcmp(((orte_ras_node_t *) item)->node_name, node = (orte_ras_node_t*)item;
orte_system_info.nodename) || if (0 == strcmp(node->node_name, orte_system_info.nodename) ||
opal_ifislocal(((orte_ras_node_t *) item)->node_name)) { opal_ifislocal(node->node_name)) {
opal_list_remove_item(nodes, item); opal_list_remove_item(allocated_nodes, item);
break; break;
} }
} }
} }
/** remove all nodes that are already at max usage */ /** remove all nodes that are already at max usage */
item = opal_list_get_first(nodes); item = opal_list_get_first(allocated_nodes);
while (item != opal_list_get_end(nodes)) { while (item != opal_list_get_end(allocated_nodes)) {
/** save the next pointer in case we remove this node */ /** save the next pointer in case we remove this node */
next = opal_list_get_next(item); next = opal_list_get_next(item);
@ -137,8 +140,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
/** check to see if this node is fully used - remove if so */ /** check to see if this node is fully used - remove if so */
node = (orte_ras_node_t*)item; node = (orte_ras_node_t*)item;
if (0 != node->node_slots_max && node->node_slots_inuse > node->node_slots_max) { if (0 != node->node_slots_max && node->node_slots_inuse > node->node_slots_max) {
opal_list_remove_item(nodes, item); opal_list_remove_item(allocated_nodes, item);
} else { /** otherwise, add its slots to the total */ } else { /** otherwise, add the slots for our job to the total */
num_slots += node->node_slots; num_slots += node->node_slots;
} }
@ -146,8 +149,8 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort
item = next; item = next;
} }
/* Sanity check to make sure we have been allocated nodes */ /* Sanity check to make sure we have resources available */
if (0 == opal_list_get_size(nodes)) { if (0 == opal_list_get_size(allocated_nodes)) {
ORTE_ERROR_LOG(ORTE_ERR_TEMP_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_TEMP_OUT_OF_RESOURCE);
return ORTE_ERR_TEMP_OUT_OF_RESOURCE; return ORTE_ERR_TEMP_OUT_OF_RESOURCE;
} }
@ -245,67 +248,108 @@ int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
} }
/* int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename,
* Claim a slot for a specified job on a node char *username, bool oversubscribed, orte_mapped_proc_t *proc)
*/
int orte_rmaps_base_claim_slot(orte_rmaps_base_map_t *map,
orte_ras_node_t *current_node,
orte_jobid_t jobid, orte_vpid_t vpid,
int proc_index,
opal_list_t *nodes,
opal_list_t *fully_used_nodes)
{ {
orte_rmaps_base_proc_t *proc; opal_list_item_t *item;
orte_process_name_t *proc_name; orte_mapped_node_t *node;
orte_rmaps_base_node_t *rmaps_node;
int rc;
/* create objects */ for (item = opal_list_get_first(&map->nodes);
rmaps_node = OBJ_NEW(orte_rmaps_base_node_t); item != opal_list_get_end(&map->nodes);
if (NULL == rmaps_node) { item = opal_list_get_next(item)) {
node = (orte_mapped_node_t*)item;
if (cell == node->cell && 0 == strcmp(nodename, node->nodename)) {
/* node was found - add this proc to that list */
opal_list_append(&node->procs, &proc->super);
/* set the oversubscribed flag */
node->oversubscribed = oversubscribed;
return ORTE_SUCCESS;
}
}
/* node was NOT found - add this one to the list */
node = OBJ_NEW(orte_mapped_node_t);
if (NULL == node) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
OBJ_RETAIN(current_node); node->cell = cell;
rmaps_node->node = current_node; node->nodename = strdup(nodename);
proc = OBJ_NEW(orte_rmaps_base_proc_t); if (NULL != username) {
node->username = strdup(username);
}
node->oversubscribed = oversubscribed;
opal_list_append(&map->nodes, &node->super);
/* and add this proc to the new node's list of procs */
opal_list_append(&node->procs, &proc->super);
return ORTE_SUCCESS;
}
/*
* Claim a slot for a specified job on a node
*/
int orte_rmaps_base_claim_slot(orte_job_map_t *map,
orte_ras_node_t *current_node,
orte_jobid_t jobid, orte_vpid_t vpid,
orte_std_cntr_t app_idx,
opal_list_t *nodes,
opal_list_t *fully_used_nodes)
{
orte_process_name_t *name;
orte_mapped_proc_t *proc;
bool oversub;
int rc;
/* create mapped_proc object */
proc = OBJ_NEW(orte_mapped_proc_t);
if (NULL == proc) { if (NULL == proc) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(rmaps_node);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* create the process name as an offset from the vpid-start */ /* create the process name as an offset from the vpid-start */
rc = orte_ns.create_process_name(&proc_name, current_node->node_cellid, rc = orte_ns.create_process_name(&name, current_node->node_cellid,
jobid, vpid); jobid, vpid);
if (rc != ORTE_SUCCESS) { if (rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(proc); OBJ_RELEASE(proc);
OBJ_RELEASE(rmaps_node);
return rc; return rc;
} }
proc->proc_node = rmaps_node; proc->name = *name;
proc->proc_name = *proc_name; proc->rank = vpid;
proc->proc_rank = vpid; proc->app_idx = app_idx;
orte_ns.free_name(&proc_name);
OBJ_RETAIN(proc); /* bump reference count for the node */
opal_list_append(&rmaps_node->node_procs, &proc->super);
map->procs[proc_index] = proc;
/* Save this node on the map */
opal_list_append(&map->nodes, &rmaps_node->super);
/* Be sure to demarcate this slot as claimed for the node */ /* Be sure to demarcate this slot as claimed for the node */
current_node->node_slots_inuse++; current_node->node_slots_inuse++;
/* see if this node is oversubscribed now */
if (current_node->node_slots_inuse >= current_node->node_slots) {
oversub = true;
} else {
oversub = false;
}
/* add the proc to the map */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(map, current_node->node_cellid,
current_node->node_name,
current_node->node_username,
oversub, proc))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(proc);
return rc;
}
/* Remove this node if it has reached its max number of allocatable slots OR it has /* Remove this node if it has reached its max number of allocatable slots OR it has
* reached the soft limit AND we are in a "no oversubscribe" state * reached the soft limit AND we are in a "no oversubscribe" state
*/ */
if ((0 != current_node->node_slots_max && if ((0 != current_node->node_slots_max &&
current_node->node_slots_inuse >= current_node->node_slots_max) || current_node->node_slots_inuse >= current_node->node_slots_max) ||
(!orte_rmaps_base.oversubscribe && (!orte_rmaps_base.oversubscribe && oversub)) {
current_node->node_slots_inuse >= current_node->node_slots)) {
opal_list_remove_item(nodes, (opal_list_item_t*)current_node); opal_list_remove_item(nodes, (opal_list_item_t*)current_node);
/* add it to the list of fully used nodes */ /* add it to the list of fully used nodes */
opal_list_append(fully_used_nodes, &current_node->super); opal_list_append(fully_used_nodes, &current_node->super);

142
orte/mca/rmaps/base/rmaps_class_instances.h Обычный файл
Просмотреть файл

@ -0,0 +1,142 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*/
#ifndef ORTE_MCA_RMAPS_CLASS_INST_H
#define ORTE_MCA_RMAPS_CLASS_INST_H
/*
* includes
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/rmaps.h"
/*
* Functions for use solely within the RMAPS framework
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* orte_mapped_proc_t
*/
static void orte_rmaps_mapped_proc_construct(orte_mapped_proc_t* proc)
{
proc->name.cellid = ORTE_CELLID_INVALID;
proc->name.jobid = ORTE_JOBID_INVALID;
proc->name.vpid = ORTE_VPID_INVALID;
proc->rank = 0;
proc->app_idx = 0;
proc->pid = 0;
}
OBJ_CLASS_INSTANCE(orte_mapped_proc_t,
opal_list_item_t,
orte_rmaps_mapped_proc_construct, NULL);
/*
* orte_mapped_node_t
*/
static void orte_rmaps_mapped_node_construct(orte_mapped_node_t* node)
{
node->nodename = NULL;
node->username = NULL;
node->daemon = NULL;
node->oversubscribed = false;
OBJ_CONSTRUCT(&node->procs, opal_list_t);
}
static void orte_rmaps_mapped_node_destruct(orte_mapped_node_t* node)
{
opal_list_item_t* item;
if (NULL != node->nodename) {
free(node->nodename);
}
if (NULL != node->username) {
free(node->username);
}
if (NULL != node->daemon) {
free(node->daemon);
}
while (NULL != (item = opal_list_remove_first(&node->procs))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node->procs);
}
OBJ_CLASS_INSTANCE(orte_mapped_node_t,
opal_list_item_t,
orte_rmaps_mapped_node_construct,
orte_rmaps_mapped_node_destruct);
/*
* orte_job_map_t
*/
static void orte_rmaps_job_map_construct(orte_job_map_t* map)
{
map->job = ORTE_JOBID_INVALID;
map->num_apps = 0;
map->apps = NULL;
OBJ_CONSTRUCT(&map->nodes, opal_list_t);
}
static void orte_rmaps_job_map_destruct(orte_job_map_t* map)
{
orte_std_cntr_t i=0;
opal_list_item_t* item;
for(i=0; i < map->num_apps; i++) {
if (NULL != map->apps[i]) OBJ_RELEASE(map->apps[i]);
}
if (NULL != map->apps) {
free(map->apps);
}
while (NULL != (item = opal_list_remove_first(&map->nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&map->nodes);
}
OBJ_CLASS_INSTANCE(orte_job_map_t,
opal_list_item_t,
orte_rmaps_job_map_construct,
orte_rmaps_job_map_destruct);
/*
* external API functions will be documented in the mca/rmaps/rmaps.h file
*/
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -30,6 +30,7 @@
#include "orte/mca/ns/ns_types.h" #include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h" #include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/rml/rml_types.h" #include "orte/mca/rml/rml_types.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/rmaps.h" #include "orte/mca/rmaps/rmaps.h"
@ -67,15 +68,47 @@ OBJ_CLASS_DECLARATION(orte_rmaps_base_cmp_t);
/* /*
* Base functions * Base API functions
*/ */
int orte_rmaps_base_map(orte_jobid_t job, char *desired_mapper);
/* /*
* NO_OP functions * Map a job
* All calls to rmaps.map_job are routed through this function. This allows callers to
* the RMAPS framework to specify the particular mapper they wish to use.
*/ */
int orte_rmaps_base_map_no_op(orte_jobid_t job, char *desired_mapper); int orte_rmaps_base_map_job(orte_jobid_t job, char *desired_mapper);
/*
* Get job map
* Retrieve the information for a job map from the registry and reassemble it into
* an job_map object. Memory for the job_map object and all of its elements is
* allocated by the function
*/
ORTE_DECLSPEC int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t job);
/*
* Get node map
* Retrieve the information for a job map from the registry and provide the info
* for the specified node
*/
ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell,
char *nodename, orte_jobid_t job);
/*
* Registry functions for maps
*/
/*
* Put job map
* Given a pointer to an orte_job_map_t, place the map's information on
* the registry. Info is entered into the containers for each individual process on
* the job's segment. Additionally, the function sets the INIT counter to the number
* of processes in the map, thus causing the INIT trigger to fire so that any
* attached subscriptions can be serviced.
*/
ORTE_DECLSPEC int orte_rmaps_base_put_job_map(orte_job_map_t *map);
/* /*
* communication functions * communication functions
@ -89,22 +122,27 @@ void orte_rmaps_base_recv(int status, orte_process_name_t* sender,
/* /*
* Internal support functions * Internal support functions
*/ */
ORTE_DECLSPEC int orte_rmaps_base_mapped_node_query(opal_list_t* mapping_list, opal_list_t* nodes_alloc, orte_jobid_t jobid); /*
ORTE_DECLSPEC int orte_rmaps_base_get_map(orte_jobid_t, opal_list_t* mapping); * Function to add a mapped_proc entry to a map
ORTE_DECLSPEC int orte_rmaps_base_set_map(orte_jobid_t, opal_list_t* mapping); * Scans list of nodes on map to see if the specified one already
ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_cellid_t, orte_jobid_t, const char*, opal_list_t* mapping); * exists - if so, just add this entry to that node's list of
* procs. If not, then add new node entry and put this proc
* on its list.
*/
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename,
char *username, bool oversubscribed, orte_mapped_proc_t *proc);
ORTE_DECLSPEC int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots); int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots);
ORTE_DECLSPEC int orte_rmaps_base_update_node_usage(opal_list_t *nodes); int orte_rmaps_base_update_node_usage(opal_list_t *nodes);
ORTE_DECLSPEC int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list, int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
orte_app_context_t *app, orte_app_context_t *app,
opal_list_t *master_node_list, opal_list_t *master_node_list,
orte_std_cntr_t *total_num_slots); orte_std_cntr_t *total_num_slots);
ORTE_DECLSPEC int orte_rmaps_base_claim_slot(orte_rmaps_base_map_t *map, int orte_rmaps_base_claim_slot(orte_job_map_t *map,
orte_ras_node_t *current_node, orte_ras_node_t *current_node,
orte_jobid_t jobid, orte_vpid_t vpid, orte_jobid_t jobid, orte_vpid_t vpid,
int proc_index, orte_std_cntr_t app_idx,
opal_list_t *nodes, opal_list_t *nodes,
opal_list_t *fully_used_nodes); opal_list_t *fully_used_nodes);
@ -112,32 +150,32 @@ ORTE_DECLSPEC int orte_rmaps_base_claim_slot(orte_rmaps_base_map_t *map,
void orte_rmaps_base_std_obj_release(orte_data_value_t *value); void orte_rmaps_base_std_obj_release(orte_data_value_t *value);
/* JOB_MAP */ /* JOB_MAP */
int orte_rmaps_base_copy_map(orte_rmaps_base_map_t **dest, orte_rmaps_base_map_t *src, orte_data_type_t type); int orte_rmaps_base_copy_map(orte_job_map_t **dest, orte_job_map_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_map(orte_rmaps_base_map_t *value1, orte_rmaps_base_map_t *value2, orte_data_type_t type); int orte_rmaps_base_compare_map(orte_job_map_t *value1, orte_job_map_t *value2, orte_data_type_t type);
int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src, int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type); orte_std_cntr_t num_vals, orte_data_type_t type);
int orte_rmaps_base_print_map(char **output, char *prefix, orte_rmaps_base_map_t *src, orte_data_type_t type); int orte_rmaps_base_print_map(char **output, char *prefix, orte_job_map_t *src, orte_data_type_t type);
int orte_rmaps_base_size_map(size_t *size, orte_rmaps_base_map_t *src, orte_data_type_t type); int orte_rmaps_base_size_map(size_t *size, orte_job_map_t *src, orte_data_type_t type);
int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest, int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type); orte_std_cntr_t *num_vals, orte_data_type_t type);
/* MAPPED_PROC */ /* MAPPED_PROC */
int orte_rmaps_base_copy_mapped_proc(orte_rmaps_base_proc_t **dest, orte_rmaps_base_proc_t *src, orte_data_type_t type); int orte_rmaps_base_copy_mapped_proc(orte_mapped_proc_t **dest, orte_mapped_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_mapped_proc(orte_rmaps_base_proc_t *value1, orte_rmaps_base_proc_t *value2, orte_data_type_t type); int orte_rmaps_base_compare_mapped_proc(orte_mapped_proc_t *value1, orte_mapped_proc_t *value2, orte_data_type_t type);
int orte_rmaps_base_pack_mapped_proc(orte_buffer_t *buffer, void *src, int orte_rmaps_base_pack_mapped_proc(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type); orte_std_cntr_t num_vals, orte_data_type_t type);
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_rmaps_base_proc_t *src, orte_data_type_t type); int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_mapped_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_rmaps_base_proc_t *src, orte_data_type_t type); int orte_rmaps_base_size_mapped_proc(size_t *size, orte_mapped_proc_t *src, orte_data_type_t type);
int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest, int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type); orte_std_cntr_t *num_vals, orte_data_type_t type);
/* MAPPED_NODE */ /* MAPPED_NODE */
int orte_rmaps_base_copy_mapped_node(orte_rmaps_base_node_t **dest, orte_rmaps_base_node_t *src, orte_data_type_t type); int orte_rmaps_base_copy_mapped_node(orte_mapped_node_t **dest, orte_mapped_node_t *src, orte_data_type_t type);
int orte_rmaps_base_compare_mapped_node(orte_rmaps_base_node_t *value1, orte_rmaps_base_node_t *value2, orte_data_type_t type); int orte_rmaps_base_compare_mapped_node(orte_mapped_node_t *value1, orte_mapped_node_t *value2, orte_data_type_t type);
int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src, int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type); orte_std_cntr_t num_vals, orte_data_type_t type);
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_rmaps_base_node_t *src, orte_data_type_t type); int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_node_t *src, orte_data_type_t type);
int orte_rmaps_base_size_mapped_node(size_t *size, orte_rmaps_base_node_t *src, orte_data_type_t type); int orte_rmaps_base_size_mapped_node(size_t *size, orte_mapped_node_t *src, orte_data_type_t type);
int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest, int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type); orte_std_cntr_t *num_vals, orte_data_type_t type);

Просмотреть файл

@ -69,6 +69,8 @@ orte_rmaps_base_component_t mca_rmaps_proxy_component = {
*/ */
static orte_rmaps_base_module_t orte_rmaps_proxy = { static orte_rmaps_base_module_t orte_rmaps_proxy = {
orte_rmaps_proxy_map, orte_rmaps_proxy_map,
orte_rmaps_base_get_job_map,
orte_rmaps_base_get_node_map,
orte_rmaps_proxy_finalize orte_rmaps_proxy_finalize
}; };

Просмотреть файл

@ -58,6 +58,18 @@
*/ */
typedef int (*orte_rmaps_base_module_map_fn_t)(orte_jobid_t job, char *desired_mapper); typedef int (*orte_rmaps_base_module_map_fn_t)(orte_jobid_t job, char *desired_mapper);
/**
* Get the map of a job from the registry
*/
typedef int (*orte_rmaps_base_module_get_job_map_fn_t)(orte_job_map_t **map, orte_jobid_t job);
/**
* Get the map for a job on a specific node from the registry. Providing a jobid of
* ORTE_JOBID_WILDCARD will return the map of all processes on that node
*/
typedef int (*orte_rmaps_base_module_get_node_map_fn_t)(orte_mapped_node_t **node, orte_cellid_t cell,
char *nodename, orte_jobid_t job);
/** /**
* Cleanup module resources. * Cleanup module resources.
*/ */
@ -67,8 +79,12 @@ typedef int (*orte_rmaps_base_module_finalize_fn_t)(void);
* rmaps module version 1.3.0 * rmaps module version 1.3.0
*/ */
struct orte_rmaps_base_module_1_3_0_t { struct orte_rmaps_base_module_1_3_0_t {
/** Maping function pointer */ /** Mapping function pointer */
orte_rmaps_base_module_map_fn_t map_job; orte_rmaps_base_module_map_fn_t map_job;
/** Get job map pointer */
orte_rmaps_base_module_get_job_map_fn_t get_job_map;
/** Node map pointer */
orte_rmaps_base_module_get_node_map_fn_t get_node_map;
/** Finalization function pointer */ /** Finalization function pointer */
orte_rmaps_base_module_finalize_fn_t finalize; orte_rmaps_base_module_finalize_fn_t finalize;
}; };

Просмотреть файл

@ -24,11 +24,7 @@
#include "orte/orte_constants.h" #include "orte/orte_constants.h"
#include "orte/mca/ns/ns_types.h" #include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h" #include "orte/mca/rmgr/rmgr_types.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/rmaps/rmaps.h"
/* /*
* General MAP types * General MAP types
@ -37,54 +33,50 @@
extern "C" { extern "C" {
#endif #endif
/**** JOB_MAP OBJECTS ***/
/*
* Mapped process info for job_map
*/
struct orte_mapped_proc_t {
opal_list_item_t super;
orte_process_name_t name; /* process name */
orte_std_cntr_t rank; /* process rank */
orte_std_cntr_t app_idx; /* index of app_context for this process */
pid_t pid;
};
typedef struct orte_mapped_proc_t orte_mapped_proc_t;
OBJ_CLASS_DECLARATION(orte_mapped_proc_t);
/* /*
* Mapping of nodes to process ranks. * Mapping of nodes to process ranks.
*/ */
struct orte_mapped_node_t {
struct orte_rmaps_base_node_t {
opal_list_item_t super; opal_list_item_t super;
orte_ras_node_t* node; orte_cellid_t cell; /* cell where this node is located */
opal_list_t node_procs; /* list of rmaps_base_proc_t */ char *nodename; /* name of node */
}; char *username;
typedef struct orte_rmaps_base_node_t orte_rmaps_base_node_t; orte_process_name_t *daemon; /* name of the daemon on this node
* NULL => daemon not assigned yet
OBJ_CLASS_DECLARATION(orte_rmaps_base_node_t);
/*
* Mapping of a process rank to a specific node.
*/ */
bool oversubscribed; /* whether or not the #procs > #processors */
struct orte_rmaps_base_proc_t { opal_list_t procs; /* list of mapped_proc objects on this node */
opal_list_item_t super;
char *app; /* name of executable */
orte_rmaps_base_node_t* proc_node;
orte_process_name_t proc_name;
orte_std_cntr_t proc_rank;
pid_t pid; /* PLS-assigned pid */
pid_t local_pid; /* pid found by local process */
}; };
typedef struct orte_rmaps_base_proc_t orte_rmaps_base_proc_t; typedef struct orte_mapped_node_t orte_mapped_node_t;
OBJ_CLASS_DECLARATION(orte_mapped_node_t);
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_proc_t);
/* /*
* Structure that represents the mapping of an application to an * Structure that represents the mapping of a job to an
* allocated set of resources. * allocated set of resources.
*/ */
struct orte_job_map_t {
struct orte_rmaps_base_map_t { opal_object_t super;
opal_list_item_t super; orte_jobid_t job;
orte_app_context_t *app; orte_std_cntr_t num_apps; /* number of app_contexts */
orte_rmaps_base_proc_t** procs; orte_app_context_t **apps; /* the array of app_contexts for this job */
orte_std_cntr_t num_procs; opal_list_t nodes; /* list of mapped_node_t */
opal_list_t nodes; /* list of rmaps_base_node_t */
}; };
typedef struct orte_rmaps_base_map_t orte_rmaps_base_map_t; typedef struct orte_job_map_t orte_job_map_t;
OBJ_CLASS_DECLARATION(orte_job_map_t);
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_map_t);
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)
} }

Просмотреть файл

@ -31,6 +31,7 @@
#include "opal/mca/base/mca_base_param.h" #include "opal/mca/base/mca_base_param.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "opal/util/trace.h"
#include "opal/util/show_help.h" #include "opal/util/show_help.h"
#include "opal/util/argv.h" #include "opal/util/argv.h"
@ -56,7 +57,7 @@ static opal_list_t fully_used_nodes;
*/ */
static int map_app_by_node( static int map_app_by_node(
orte_app_context_t* app, orte_app_context_t* app,
orte_rmaps_base_map_t* map, orte_job_map_t* map,
orte_jobid_t jobid, orte_jobid_t jobid,
orte_vpid_t vpid_start, orte_vpid_t vpid_start,
opal_list_t* nodes, opal_list_t* nodes,
@ -67,6 +68,7 @@ static int map_app_by_node(
opal_list_item_t *next; opal_list_item_t *next;
orte_ras_node_t *node; orte_ras_node_t *node;
OPAL_TRACE(2);
/* This loop continues until all procs have been mapped or we run /* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of out of resources. We determine that we have "run out of
@ -110,7 +112,7 @@ static int map_app_by_node(
/* Allocate a slot on this node */ /* Allocate a slot on this node */
node = (orte_ras_node_t*) cur_node_item; node = (orte_ras_node_t*) cur_node_item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, num_alloc, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, app->idx,
nodes, max_used_nodes))) { nodes, max_used_nodes))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
@ -121,8 +123,6 @@ static int map_app_by_node(
cur_node_item = next; cur_node_item = next;
} }
map->num_procs = num_alloc;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -133,7 +133,7 @@ static int map_app_by_node(
*/ */
static int map_app_by_slot( static int map_app_by_slot(
orte_app_context_t* app, orte_app_context_t* app,
orte_rmaps_base_map_t* map, orte_job_map_t* map,
orte_jobid_t jobid, orte_jobid_t jobid,
orte_vpid_t vpid_start, orte_vpid_t vpid_start,
opal_list_t* nodes, opal_list_t* nodes,
@ -145,6 +145,7 @@ static int map_app_by_slot(
orte_ras_node_t *node; orte_ras_node_t *node;
opal_list_item_t *next; opal_list_item_t *next;
OPAL_TRACE(2);
/* This loop continues until all procs have been mapped or we run /* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of out of resources. We determine that we have "run out of
@ -195,7 +196,7 @@ static int map_app_by_slot(
num_slots_to_take = (node->node_slots == 0) ? 1 : node->node_slots; num_slots_to_take = (node->node_slots == 0) ? 1 : node->node_slots;
for( i = 0; i < num_slots_to_take; ++i) { for( i = 0; i < num_slots_to_take; ++i) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, num_alloc, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(map, node, jobid, vpid_start + num_alloc, app->idx,
nodes, max_used_nodes))) { nodes, max_used_nodes))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop * really isn't an error - we just need to break from the loop
@ -223,8 +224,6 @@ static int map_app_by_slot(
} }
map->num_procs = num_alloc;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -235,11 +234,10 @@ static int map_app_by_slot(
static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore) static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
{ {
orte_app_context_t** context, *app; orte_app_context_t *app;
orte_rmaps_base_map_t* map; orte_job_map_t* map;
orte_std_cntr_t i, num_context; orte_std_cntr_t i;
opal_list_t master_node_list, mapped_node_list, max_used_nodes, *working_node_list; opal_list_t master_node_list, mapped_node_list, max_used_nodes, *working_node_list;
opal_list_t mapping;
opal_list_item_t *item, *item2; opal_list_item_t *item, *item2;
orte_ras_node_t *node, *node2; orte_ras_node_t *node, *node2;
orte_vpid_t vpid_start, job_vpid_start=0; orte_vpid_t vpid_start, job_vpid_start=0;
@ -247,8 +245,20 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
int rc; int rc;
bool bynode = true, modify_app_context = false; bool bynode = true, modify_app_context = false;
OPAL_TRACE(1);
/* create the map object */
map = OBJ_NEW(orte_job_map_t);
if (NULL == map) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* set the jobid */
map->job = jobid;
/* query for the application context and allocated nodes */ /* query for the application context and allocated nodes */
if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &context, &num_context))) { if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &(map->apps), &(map->num_apps)))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
@ -271,11 +281,6 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
return rc; return rc;
} }
/* construct a mapping for the job - the list will hold mappings for each
* application context
*/
OBJ_CONSTRUCT(&mapping, opal_list_t);
/** initialize the cur_node_item to point to the first node in the list */ /** initialize the cur_node_item to point to the first node in the list */
cur_node_item = opal_list_get_first(&master_node_list); cur_node_item = opal_list_get_first(&master_node_list);
@ -298,30 +303,20 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
/** construct a list to hold any nodes involved in a user-specified mapping */ /** construct a list to hold any nodes involved in a user-specified mapping */
OBJ_CONSTRUCT(&mapped_node_list, opal_list_t); OBJ_CONSTRUCT(&mapped_node_list, opal_list_t);
for(i=0; i<num_context; i++) { for(i=0; i < map->num_apps; i++) {
app = context[i]; app = map->apps[i];
/** if the number of processes wasn't specified, then we know there can be only /** if the number of processes wasn't specified, then we know there can be only
* one app_context allowed in the launch, and that we are to launch it across * one app_context allowed in the launch, and that we are to launch it across
* all available slots. We'll double-check the single app_context rule first * all available slots. We'll double-check the single app_context rule first
*/ */
if (0 == app->num_procs && 1 < num_context) { if (0 == app->num_procs && 1 < map->num_apps) {
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np", opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np",
true, num_context, NULL); true, map->num_apps, NULL);
ORTE_ERROR_LOG(ORTE_ERR_INVALID_NUM_PROCS); ORTE_ERROR_LOG(ORTE_ERR_INVALID_NUM_PROCS);
return ORTE_ERR_INVALID_NUM_PROCS; return ORTE_ERR_INVALID_NUM_PROCS;
} }
/** create a map for this app_context */
map = OBJ_NEW(orte_rmaps_base_map_t);
if(NULL == map) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/** add it to the list of mappings for the job */
opal_list_append(&mapping, &map->super);
if ( 0 < app->num_map ) { if ( 0 < app->num_map ) {
/** If the user has specified a mapping for this app_context, then we /** If the user has specified a mapping for this app_context, then we
* create a working node list that contains only those nodes. * create a working node list that contains only those nodes.
@ -355,15 +350,6 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
} }
} }
map->app = app;
map->procs = (orte_rmaps_base_proc_t**)malloc(sizeof(orte_rmaps_base_proc_t*) * app->num_procs);
if(NULL == map->procs) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/* allocate a vpid range for this app within the job */ /* allocate a vpid range for this app within the job */
if(ORTE_SUCCESS != (rc = orte_ns.reserve_range(jobid, app->num_procs, &vpid_start))) { if(ORTE_SUCCESS != (rc = orte_ns.reserve_range(jobid, app->num_procs, &vpid_start))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -467,7 +453,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
} }
/* save mapping to the registry */ /* save mapping to the registry */
if(ORTE_SUCCESS != (rc = orte_rmaps_base_set_map(jobid, &mapping))) { if(ORTE_SUCCESS != (rc = orte_rmaps_base_put_job_map(map))) {
goto cleanup; goto cleanup;
} }
@ -493,7 +479,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore)
processes processes
*/ */
if (modify_app_context) { if (modify_app_context) {
if (ORTE_SUCCESS != (rc = orte_rmgr.store_app_context(jobid, context, 1))) { if (ORTE_SUCCESS != (rc = orte_rmgr.store_app_context(jobid, map->apps, 1))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
} }
@ -505,11 +491,6 @@ cleanup:
} }
OBJ_DESTRUCT(&master_node_list); OBJ_DESTRUCT(&master_node_list);
while(NULL != (item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping);
OBJ_DESTRUCT(&max_used_nodes); OBJ_DESTRUCT(&max_used_nodes);
OBJ_DESTRUCT(&fully_used_nodes); OBJ_DESTRUCT(&fully_used_nodes);
OBJ_DESTRUCT(&mapped_node_list); OBJ_DESTRUCT(&mapped_node_list);
@ -526,6 +507,8 @@ static int orte_rmaps_rr_finalize(void)
orte_rmaps_base_module_t orte_rmaps_round_robin_module = { orte_rmaps_base_module_t orte_rmaps_round_robin_module = {
orte_rmaps_rr_map, orte_rmaps_rr_map,
orte_rmaps_base_get_job_map,
orte_rmaps_base_get_node_map,
orte_rmaps_rr_finalize orte_rmaps_rr_finalize
}; };

Просмотреть файл

@ -62,6 +62,7 @@
#define ORTE_NODE_ALLOC_KEY "orte-node-alloc" #define ORTE_NODE_ALLOC_KEY "orte-node-alloc"
#define ORTE_NODE_BOOTPROXY_KEY "orte-node-bootproxy" #define ORTE_NODE_BOOTPROXY_KEY "orte-node-bootproxy"
#define ORTE_NODE_USERNAME_KEY "orte-node-username" #define ORTE_NODE_USERNAME_KEY "orte-node-username"
#define ORTE_NODE_OVERSUBSCRIBED_KEY "orte-node-oversubscribed"
#define ORTE_JOB_APP_CONTEXT_KEY "orte-job-app-context" #define ORTE_JOB_APP_CONTEXT_KEY "orte-job-app-context"
#define ORTE_JOB_SLOTS_KEY "orte-job-slots" /**< number of procs in job */ #define ORTE_JOB_SLOTS_KEY "orte-job-slots" /**< number of procs in job */
#define ORTE_JOB_VPID_START_KEY "orte-job-vpid-start" #define ORTE_JOB_VPID_START_KEY "orte-job-vpid-start"

Просмотреть файл

@ -66,7 +66,7 @@ extern char **environ;
#include "opal/mca/base/base.h" #include "opal/mca/base/base.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/rmgr_types.h" #include "orte/mca/rmgr/rmgr_types.h"
#include "orte/mca/rmaps/base/rmaps_private.h" #include "orte/mca/rmaps/rmaps.h"
#include "orte/runtime/runtime.h" #include "orte/runtime/runtime.h"
#include "totalview.h" #include "totalview.h"
@ -333,8 +333,11 @@ void orte_totalview_init_before_spawn(void)
*/ */
void orte_totalview_init_after_spawn(orte_jobid_t jobid) void orte_totalview_init_after_spawn(orte_jobid_t jobid)
{ {
opal_list_t list_of_resource_maps; orte_job_map_t *map;
opal_list_item_t *item; opal_list_item_t *item, *item2;
orte_mapped_node_t *node;
orte_mapped_proc_t *proc;
orte_app_context_t *appctx;
orte_std_cntr_t i; orte_std_cntr_t i;
int rc; int rc;
@ -364,23 +367,18 @@ void orte_totalview_init_after_spawn(orte_jobid_t jobid)
MPIR_debug_state = 1; MPIR_debug_state = 1;
OBJ_CONSTRUCT(&list_of_resource_maps, opal_list_t); /* Get the resource map for this job */
/* Get a list of the resource maps for this job */ rc = orte_rmaps.get_job_map(&map, jobid);
rc = orte_rmaps_base_get_map(jobid, &list_of_resource_maps);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
opal_output(0, "Error: Can't get list of resource maps\n"); opal_output(0, "Error: Can't get resource map\n");
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
/* find the total number of processes in the job */ /* find the total number of processes in the job */
for (item = opal_list_get_first(&list_of_resource_maps); for (i=0; i < map->num_apps; i++) {
item != opal_list_get_end(&list_of_resource_maps); MPIR_proctable_size += map->apps[i]->num_procs;
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t *map = (orte_rmaps_base_map_t*) item;
MPIR_proctable_size += map->num_procs;
} }
/* allocate MPIR_proctable */ /* allocate MPIR_proctable */
@ -389,27 +387,34 @@ void orte_totalview_init_after_spawn(orte_jobid_t jobid)
MPIR_proctable_size); MPIR_proctable_size);
if (MPIR_proctable == NULL) { if (MPIR_proctable == NULL) {
opal_output(0, "Error: Out of memory\n"); opal_output(0, "Error: Out of memory\n");
OBJ_DESTRUCT(&list_of_resource_maps); OBJ_RELEASE(map);
} }
/* initialize MPIR_proctable */ /* initialize MPIR_proctable */
for (item = opal_list_get_first(&list_of_resource_maps); i=0;
item != opal_list_get_end(&list_of_resource_maps); for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
orte_rmaps_base_map_t *map = (orte_rmaps_base_map_t*) item; node = (orte_mapped_node_t*)item;
for (i = 0; i < map->num_procs; i++) {
orte_rmaps_base_proc_t *proc = map->procs[i]; for (item2 = opal_list_get_first(&node->procs);
MPIR_proctable[i].host_name = proc->proc_node->node->node_name; item2 != opal_list_get_end(&node->procs);
item2 = opal_list_get_next(item2)) {
proc = (orte_mapped_proc_t*)item2;
appctx = map->apps[proc->app_idx];
MPIR_proctable[i].host_name = strdup(node->nodename);
MPIR_proctable[i].executable_name = MPIR_proctable[i].executable_name =
opal_os_path( false, map->app->cwd, proc->app, NULL ); opal_os_path( false, appctx->cwd, appctx->app, NULL );
MPIR_proctable[i].pid = proc->local_pid; MPIR_proctable[i].pid = proc->pid;
i++;
} }
} }
OBJ_DESTRUCT(&list_of_resource_maps); OBJ_RELEASE(map);
} }
if (orte_debug_flag) { if (orte_debug_flag) {
dump(); dump();
} }