1
1

For convenience, revise the orte_job_map_t object so it includes the vpid start/range values, the number of nodes, and the number of processes on each node. These values are all used in various places in the code base - we currently re-compute them multiple times. Since these values do not change and are already being computed by the RMAPS framework, we might as well just save them for re-use.

This commit was SVN r12829.
Этот коммит содержится в:
Ralph Castain 2006-12-12 16:07:23 +00:00
родитель 337116d5fd
Коммит 3b064a624e
8 изменённых файлов: 221 добавлений и 109 удалений

Просмотреть файл

@ -55,6 +55,11 @@ int orte_rmaps_base_copy_map(orte_job_map_t **dest, orte_job_map_t *src, orte_da
/* copy data into it */
(*dest)->job = src->job;
if (NULL != src->mapping_mode) {
(*dest)->mapping_mode = strdup(src->mapping_mode);
}
(*dest)->vpid_start = src->vpid_start;
(*dest)->vpid_range = src->vpid_range;
(*dest)->num_apps = src->num_apps;
(*dest)->apps = (orte_app_context_t**)malloc(src->num_apps * sizeof(orte_app_context_t*));
@ -71,6 +76,7 @@ int orte_rmaps_base_copy_map(orte_job_map_t **dest, orte_job_map_t *src, orte_da
}
}
(*dest)->num_nodes = src->num_nodes;
for (item = opal_list_get_first(&(src->nodes));
item != opal_list_get_end(&(src->nodes));
item = opal_list_get_next(item)) {
@ -157,6 +163,7 @@ int orte_rmaps_base_copy_mapped_node(orte_mapped_node_t **dest, orte_mapped_node
(*dest)->oversubscribed = src->oversubscribed;
(*dest)->num_procs = src->num_procs;
for (item = opal_list_get_first(&(src->procs));
item != opal_list_get_end(&(src->procs));
item = opal_list_get_next(item)) {

Просмотреть файл

@ -37,7 +37,7 @@ int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type)
{
int rc;
orte_std_cntr_t i, num_nodes;
orte_std_cntr_t i;
orte_job_map_t **maps;
opal_list_item_t *item;
orte_mapped_node_t *srcnode;
@ -52,6 +52,24 @@ int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
return rc;
}
/* pack the mapping mode used to generate it */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->mapping_mode), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the starting vpid */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->vpid_start), 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the range */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->vpid_range), 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the number of app_contexts */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->num_apps), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
@ -65,14 +83,13 @@ int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
}
/* pack the number of nodes */
num_nodes = (orte_std_cntr_t)opal_list_get_size(&(maps[i]->nodes));
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &num_nodes, 1, ORTE_STD_CNTR))) {
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(maps[i]->num_nodes), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the nodes list */
if (0 < num_nodes) {
if (0 < maps[i]->num_nodes) {
for (item = opal_list_get_first(&(maps[i]->nodes));
item != opal_list_get_end(&(maps[i]->nodes));
item = opal_list_get_next(item)) {
@ -141,7 +158,7 @@ int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type)
{
int rc;
orte_std_cntr_t i, num_procs;
orte_std_cntr_t i;
orte_mapped_node_t **nodes;
opal_list_item_t *item;
orte_mapped_proc_t *srcproc;
@ -181,14 +198,13 @@ int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
}
/* pack the number of procs */
num_procs = (orte_std_cntr_t)opal_list_get_size(&(nodes[i]->procs));
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &num_procs, 1, ORTE_STD_CNTR))) {
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->num_procs), 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the procs list */
if (0 < num_procs) {
if (0 < nodes[i]->num_procs) {
for (item = opal_list_get_first(&(nodes[i]->procs));
item != opal_list_get_end(&(nodes[i]->procs));
item = opal_list_get_next(item)) {

Просмотреть файл

@ -38,7 +38,7 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_job_map_t *src,
{
char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
orte_mapped_node_t *srcnode;
orte_std_cntr_t i, num_nodes;
orte_std_cntr_t i;
opal_list_item_t *item;
int rc;
@ -52,7 +52,9 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_job_map_t *src,
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "%sMap for job: %ld\tNum app_contexts: %ld", pfx2, (long)src->job, (long)src->num_apps);
asprintf(&tmp, "%sMap for job: %ld\tGenerated by mapping mode: %s\n%s\tStarting vpid: %ld\tVpid range: %ld\tNum app_contexts: %ld",
pfx2, (long)src->job, (NULL == src->mapping_mode) ? "NULL" : src->mapping_mode,
pfx2, (long)src->vpid_start, (long)src->vpid_range, (long)src->num_apps);
asprintf(&pfx, "%s\t", pfx2);
free(pfx2);
@ -70,8 +72,7 @@ int orte_rmaps_base_print_map(char **output, char *prefix, orte_job_map_t *src,
tmp = tmp3;
}
num_nodes = (orte_std_cntr_t)opal_list_get_size(&(src->nodes));
asprintf(&tmp, "%s\n%sNum elements in nodes list: %ld", tmp3, pfx, (long)num_nodes);
asprintf(&tmp, "%s\n%sNum elements in nodes list: %ld", tmp3, pfx, (long)src->num_nodes);
for (item = opal_list_get_first(&(src->nodes));
item != opal_list_get_end(&(src->nodes));
@ -145,7 +146,6 @@ int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_n
{
int rc;
char *tmp, *tmp2, *tmp3, *pfx, *pfx2;
orte_std_cntr_t num_procs;
opal_list_item_t *item;
orte_mapped_proc_t *srcproc;
@ -173,9 +173,8 @@ int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_n
return rc;
}
num_procs = (orte_std_cntr_t)opal_list_get_size(&(src->procs));
asprintf(&tmp3, "%s\n\t%s\n%sOversubscribed: %s\tNum elements in procs list: %ld", tmp, tmp2, pfx,
(src->oversubscribed ? "True" : "False"), (long)num_procs);
(src->oversubscribed ? "True" : "False"), (long)src->num_procs);
free(tmp);
free(tmp2);

Просмотреть файл

@ -39,7 +39,7 @@ int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type)
{
int rc;
orte_std_cntr_t i, j, n, num_nodes;
orte_std_cntr_t i, j, n;
orte_job_map_t **maps;
orte_mapped_node_t *node;
@ -62,6 +62,30 @@ int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest,
return rc;
}
/* unpack the mapping mode */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->mapping_mode), &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the starting vpid */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->vpid_start), &n, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the vpid range */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
&(maps[i]->vpid_range), &n, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the number of app_contexts */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
@ -85,12 +109,12 @@ int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest,
/* unpack the number of nodes */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &num_nodes, &n, ORTE_STD_CNTR))) {
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(maps[i]->num_nodes), &n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
for (j=0; j < num_nodes; j++) {
for (j=0; j < maps[i]->num_nodes; j++) {
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &node, &n, ORTE_MAPPED_NODE))) {
ORTE_ERROR_LOG(rc);
@ -167,7 +191,7 @@ int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type)
{
int rc;
orte_std_cntr_t i, j, n, num_procs;
orte_std_cntr_t i, j, n;
orte_mapped_node_t **nodes;
orte_mapped_proc_t *srcproc;
@ -224,21 +248,19 @@ int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
/* unpack the number of procs */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &num_procs, &n, ORTE_STD_CNTR))) {
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(nodes[i]->num_procs), &n, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if we have some, unpack them */
if (0 < num_procs) {
for (j=0; j < num_procs; j++) {
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &srcproc, &n, ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc);
return rc;
}
opal_list_append(&(nodes[i]->procs), &srcproc->super);
for (j=0; j < nodes[i]->num_procs; j++) {
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &srcproc, &n, ORTE_MAPPED_PROC))) {
ORTE_ERROR_LOG(rc);
return rc;
}
opal_list_append(&(nodes[i]->procs), &srcproc->super);
}
}

Просмотреть файл

@ -43,7 +43,10 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
{
orte_job_map_t *mapping;
orte_mapped_proc_t *proc;
orte_mapped_node_t *mnode;
opal_list_item_t *item;
orte_cellid_t *cellptr, cell=ORTE_CELLID_INVALID;
orte_vpid_t *vptr;
orte_std_cntr_t *sptr;
bool *bptr, oversub=false;
pid_t *pidptr;
@ -64,6 +67,8 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
ORTE_NODE_NAME_KEY,
ORTE_NODE_USERNAME_KEY,
ORTE_NODE_OVERSUBSCRIBED_KEY,
ORTE_JOB_VPID_START_KEY,
ORTE_JOB_VPID_RANGE_KEY,
NULL
};
@ -79,7 +84,7 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* store the jobid */
/* set the jobid */
mapping->job = jobid;
/* get the job segment name */
@ -119,89 +124,123 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
value = values[v];
node_name = NULL;
proc = OBJ_NEW(orte_mapped_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
if (0 == strcmp(value->tokens[0], ORTE_JOB_GLOBALS)) {
/* this came from the job_globals container, so look for the related values */
for (kv=0; kv < value->cnt; kv++) {
if(strcmp(value->keyvals[kv]->key, ORTE_JOB_VPID_START_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, value->keyvals[kv]->value, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
mapping->vpid_start = *vptr;
continue;
}
if(strcmp(value->keyvals[kv]->key, ORTE_JOB_VPID_RANGE_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, value->keyvals[kv]->value, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
mapping->vpid_range = *vptr;
continue;
}
}
}
for(kv = 0; kv<value->cnt; kv++) {
keyval = value->keyvals[kv];
else {
/* this came from a process container */
proc = OBJ_NEW(orte_mapped_proc_t);
if(NULL == proc) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
for(kv = 0; kv<value->cnt; kv++) {
keyval = value->keyvals[kv];
if(strcmp(keyval->key, ORTE_PROC_RANK_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->rank = *sptr;
continue;
}
proc->rank = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
if(strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, keyval->value, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->name = *pptr;
continue;
}
proc->name = *pptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->app_idx = *sptr;
continue;
}
proc->app_idx = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
if(strcmp(keyval->key, ORTE_PROC_LOCAL_PID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
proc->pid = *pidptr;
continue;
}
proc->pid = *pidptr;
continue;
}
if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cellptr, keyval->value, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cellptr, keyval->value, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
cell = *cellptr;
continue;
}
cell = *cellptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_USERNAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&username, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
if(strcmp(keyval->key, ORTE_NODE_USERNAME_KEY) == 0) {
/* use the dss.copy function here to protect us against zero-length strings */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&username, keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
continue;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_OVERSUBSCRIBED_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, keyval->value, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
if(strcmp(keyval->key, ORTE_NODE_OVERSUBSCRIBED_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, keyval->value, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
oversub = *bptr;
continue;
}
oversub = *bptr;
continue;
}
/* store this process in the map */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(mapping, cell, node_name, username, oversub, proc))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (NULL != node_name) free(node_name);
}
/* store this process in the map */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(mapping, cell, node_name, username, oversub, proc))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (NULL != node_name) free(node_name);
}
/* compute and save convenience values */
mapping->num_nodes = opal_list_get_size(&mapping->nodes);
for (item = opal_list_get_first(&mapping->nodes);
item != opal_list_get_end(&mapping->nodes);
item = opal_list_get_next(item)) {
mnode = (orte_mapped_node_t*)item;
mnode->num_procs = opal_list_get_size(&mnode->procs);
}
/* all done */
*map = mapping;
return ORTE_SUCCESS;
@ -303,10 +342,10 @@ int orte_rmaps_base_put_job_map(orte_job_map_t *map)
return rc;
}
/** setup the last value in the array to update the INIT counter */
/** setup the last value in the array to store the vpid start/range and update the INIT counter */
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[num_procs]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 1, 1))) {
segment, 3, 1))) {
ORTE_ERROR_LOG(rc);
free(values);
free(segment);
@ -316,6 +355,14 @@ int orte_rmaps_base_put_job_map(orte_job_map_t *map)
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[num_procs]->keyvals[1]), ORTE_JOB_VPID_START_KEY, ORTE_VPID, &map->vpid_start))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[num_procs]->keyvals[2]), ORTE_JOB_VPID_RANGE_KEY, ORTE_VPID, &map->vpid_range))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
values[num_procs]->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* counter is in the job's globals container */

Просмотреть файл

@ -66,6 +66,7 @@ static void orte_rmaps_mapped_node_construct(orte_mapped_node_t* node)
node->username = NULL;
node->daemon = NULL;
node->oversubscribed = false;
node->num_procs = 0;
OBJ_CONSTRUCT(&node->procs, opal_list_t);
}
@ -103,8 +104,12 @@ OBJ_CLASS_INSTANCE(orte_mapped_node_t,
static void orte_rmaps_job_map_construct(orte_job_map_t* map)
{
map->job = ORTE_JOBID_INVALID;
map->mapping_mode = NULL;
map->vpid_start = ORTE_VPID_INVALID;
map->vpid_range = 0;
map->num_apps = 0;
map->apps = NULL;
map->num_nodes = 0;
OBJ_CONSTRUCT(&map->nodes, opal_list_t);
}
@ -113,6 +118,8 @@ static void orte_rmaps_job_map_destruct(orte_job_map_t* map)
orte_std_cntr_t i=0;
opal_list_item_t* item;
if (NULL != map->mapping_mode) free(map->mapping_mode);
for(i=0; i < map->num_apps; i++) {
if (NULL != map->apps[i]) OBJ_RELEASE(map->apps[i]);
}

Просмотреть файл

@ -69,7 +69,10 @@ struct orte_mapped_node_t {
orte_process_name_t *daemon; /* name of the daemon on this node
* NULL => daemon not assigned yet
*/
bool oversubscribed; /* whether or not the #procs > #processors */
bool oversubscribed; /* whether or not the #procs > #process slots on this node */
orte_std_cntr_t num_procs; /* #procs on this node - just the length of the procs list, but
* stored here so we don't have to keep recomputing it elsewhere
*/
opal_list_t procs; /* list of mapped_proc objects on this node */
};
typedef struct orte_mapped_node_t orte_mapped_node_t;
@ -82,8 +85,14 @@ OBJ_CLASS_DECLARATION(orte_mapped_node_t);
struct orte_job_map_t {
opal_object_t super;
orte_jobid_t job;
char *mapping_mode;
orte_vpid_t vpid_start;
orte_vpid_t vpid_range;
orte_std_cntr_t num_apps; /* number of app_contexts */
orte_app_context_t **apps; /* the array of app_contexts for this job */
orte_std_cntr_t num_nodes; /* #nodes in this map - just the length of the nodes list, but
* stored here so we don't have to keep recomputing it elsewhere
*/
opal_list_t nodes; /* list of mapped_node_t */
};
typedef struct orte_job_map_t orte_job_map_t;

Просмотреть файл

@ -330,8 +330,9 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes)
opal_list_t master_node_list, mapped_node_list, max_used_nodes, *working_node_list;
opal_list_item_t *item, *item2;
orte_ras_node_t *node, *node2;
orte_mapped_node_t *mnode;
char *save_bookmark;
orte_vpid_t vpid_start, job_vpid_start=0;
orte_vpid_t vpid_start;
orte_std_cntr_t num_procs = 0, total_num_slots, mapped_num_slots, num_nodes, num_slots;
int rc;
bool modify_app_context = false;
@ -509,7 +510,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes)
/** save the initial starting vpid for later */
if (0 == i) {
job_vpid_start = vpid_start;
map->vpid_start = vpid_start;
}
/** track the total number of processes we mapped */
@ -646,17 +647,21 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes)
}
/* compute and save convenience values */
map->vpid_range = num_procs;
map->num_nodes = opal_list_get_size(&map->nodes);
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
mnode = (orte_mapped_node_t*)item;
mnode->num_procs = opal_list_get_size(&mnode->procs);
}
/* save mapping to the registry */
if(ORTE_SUCCESS != (rc = orte_rmaps_base_put_job_map(map))) {
goto cleanup;
}
/* save vpid start/range on the job segment */
if (ORTE_SUCCESS != (rc = orte_rmgr.set_vpid_range(jobid, job_vpid_start, num_procs))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/** join the master_node_list and fully_used_list so that all info gets updated */
opal_list_join(&master_node_list, opal_list_get_end(&master_node_list), &fully_used_nodes);