1
1

Complete the attribute management functions.

Modify the mapper to better bookmark its stopping place each time, and to pick up the next time from there. This needs to be validated on a multi-node system.

Fix a major memory corruption problem in the registry put/get functions that was doing multiple free's. Not sure how valgrind missed this one, though it only occurred in specific circumstances (such as comm_spawn).

This commit was SVN r12179.
Этот коммит содержится в:
Ralph Castain 2006-10-18 20:02:16 +00:00
родитель 2036bf5c3c
Коммит d0eb7d7216
15 изменённых файлов: 531 добавлений и 144 удалений

Просмотреть файл

@ -47,6 +47,7 @@
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/ras_types.h"
#include "orte/mca/rmaps/rmaps_types.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/rmgr/base/base.h"
#include "orte/mca/smr/smr_types.h"
@ -357,6 +358,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
orte_app_context_t **apps=NULL;
opal_list_t attributes;
opal_list_item_t *item;
/* parse the info object */
@ -548,7 +550,18 @@ ompi_comm_start_processes(int count, char **array_of_commands,
* don't go get one just for them
*/
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RAS_USE_PARENT_ALLOCATION,
ORTE_JOBID, &(orte_process_info.my_name->jobid)))) {
ORTE_JOBID, &(orte_process_info.my_name->jobid),
ORTE_RMGR_ATTR_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&attributes);
opal_progress_event_decrement();
return MPI_ERR_SPAWN;
}
/* tell the RTE that we want the children mapped the same way as their parent */
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RMAPS_USE_PARENT_PLAN,
ORTE_JOBID, &(orte_process_info.my_name->jobid),
ORTE_RMGR_ATTR_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&attributes);
opal_progress_event_decrement();
@ -564,6 +577,9 @@ ompi_comm_start_processes(int count, char **array_of_commands,
/* clean up */
opal_progress_event_decrement();
while (NULL != (item = opal_list_remove_first(&attributes))) OBJ_RELEASE(item);
OBJ_DESTRUCT(&attributes);
for ( i=0; i<count; i++) {
OBJ_RELEASE(apps[i]);
}

Просмотреть файл

@ -110,7 +110,7 @@ int orte_gpr_replica_recv_put_cmd(orte_buffer_t *buffer, orte_buffer_t *answer)
}
}
free(itags);
if (NULL != itags) free(itags);
itags = NULL;
}
@ -127,7 +127,7 @@ int orte_gpr_replica_recv_put_cmd(orte_buffer_t *buffer, orte_buffer_t *answer)
OBJ_RELEASE(values[i]);
}
}
free(values);
if (NULL != values) free(values);
}
if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &ret, 1, ORTE_INT))) {
@ -267,15 +267,17 @@ int orte_gpr_replica_recv_get_cmd(orte_buffer_t *input_buffer,
if (NULL != tokens) {
for (i=0; i<num_tokens; i++) {
free(tokens[i]);
tokens[i] = NULL;
}
free(tokens);
if (NULL != tokens) free(tokens);
}
if (NULL != keys) {
for (i=0; i<num_keys; i++) {
free(keys[i]);
keys[i] = NULL;
}
free(keys);
if (NULL != keys) free(keys);
}
if (NULL != tokentags) {
@ -291,7 +293,7 @@ int orte_gpr_replica_recv_get_cmd(orte_buffer_t *input_buffer,
if (NULL != values[i])
OBJ_RELEASE(values[i]);
}
free(values);
if (NULL != values) free(values);
}
/* pack response code */
@ -482,15 +484,17 @@ int orte_gpr_replica_recv_get_conditional_cmd(orte_buffer_t *input_buffer,
if (NULL != tokens) {
for (i=0; i<num_tokens; i++) {
free(tokens[i]);
tokens[i] = NULL;
}
free(tokens);
if (NULL != tokens) free(tokens);
}
if (NULL != keys) {
for (i=0; i<num_keys; i++) {
free(keys[i]);
keys[i] = NULL;
}
free(keys);
if (NULL != keys) free(keys);
}
if (NULL != tokentags) {
@ -506,7 +510,7 @@ int orte_gpr_replica_recv_get_conditional_cmd(orte_buffer_t *input_buffer,
if (NULL != values[i])
OBJ_RELEASE(values[i]);
}
free(values);
if (NULL != values) free(values);
}
if (NULL != conds) {
@ -515,7 +519,7 @@ int orte_gpr_replica_recv_get_conditional_cmd(orte_buffer_t *input_buffer,
OBJ_RELEASE(conds[i]);
}
}
free(conds);
if (NULL != conds) free(conds);
}
if (NULL != conditions) {
for (i=0; i < num_conditions; i++) {
@ -523,7 +527,7 @@ int orte_gpr_replica_recv_get_conditional_cmd(orte_buffer_t *input_buffer,
OBJ_RELEASE(conditions[i]);
}
}
free(conditions);
if (NULL != conditions) free(conditions);
}
/* pack response code */

Просмотреть файл

@ -49,68 +49,106 @@ int orte_rmaps_base_map_job(orte_jobid_t job, opal_list_t *attributes)
orte_rmaps_base_module_t *module=NULL;
orte_attribute_t *attr;
char *desired_mapper;
opal_list_t working_attrs;
opal_list_item_t *item;
orte_jobid_t *jptr, parent_job=ORTE_JOBID_INVALID;
int rc;
/* check the attributes to see if anything in the environment
* has been overridden. If not, then install the environment
* values to correctly control the behavior of the RMAPS component.
*/
if (NULL == (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_MAP_POLICY))) {
/* was NOT provided - use what was set by the environment */
if (orte_rmaps_base.bynode) {
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_MAP_POLICY,
ORTE_STRING, "bynode"))) {
ORTE_ERROR_LOG(rc);
return rc;
}
} else {
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_MAP_POLICY,
ORTE_STRING, "byslot"))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
if (NULL == (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_PERNODE))) {
/* was NOT provided - add it if it was set by the environment. Note that this
* attribute only cares if it exists - its value is irrelevant and hence
* not provided
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_USE_PARENT_PLAN))) {
/* was provided - lookup the specified jobid's mapping plan and use it. This
* includes the FULL list of mapping attributes that were used. We will
* subsequently override those settings with anything that was specifically
* provided by the caller
*/
if (orte_rmaps_base.per_node) {
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_PERNODE,
ORTE_UNDEF, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
parent_job = *jptr;
/* lookup that job's mapping policy */
OBJ_CONSTRUCT(&working_attrs, opal_list_t);
if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_mapping_plan(parent_job, &working_attrs))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&working_attrs);
return rc;
}
/* go through the parent policy and "fill" anything that was missing in the
* list of attributes provided. We specifically don't overwrite anything provided
* by the caller - the caller is allowed to "override" any specific attribute
* of the parent's plan
*/
if (ORTE_SUCCESS != (rc = orte_rmgr.merge_attributes(attributes, &working_attrs,
ORTE_RMGR_ATTR_NO_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&working_attrs);
return rc;
}
/* clean up */
while (NULL != (item = opal_list_remove_first(&working_attrs))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&working_attrs);
}
/* check the mapping policy */
if (orte_rmaps_base.bynode) {
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_MAP_POLICY,
ORTE_STRING, "bynode",
ORTE_RMGR_ATTR_NO_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
} else {
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_MAP_POLICY,
ORTE_STRING, "byslot",
ORTE_RMGR_ATTR_NO_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* check pernode - add it if it was set by the environment. Note that this
* attribute only cares if it exists - its value is irrelevant and hence
* not provided
*/
if (orte_rmaps_base.per_node) {
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_PERNODE,
ORTE_UNDEF, NULL,
ORTE_RMGR_ATTR_NO_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
if (NULL == (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_NO_USE_LOCAL))) {
/* was NOT provided - add it if it was set by the environment. Note that this
* attribute only cares if it exists - its value is irrelevant and hence
* not provided
*/
if (orte_rmaps_base.no_use_local) {
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_NO_USE_LOCAL,
ORTE_UNDEF, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* check no_local - add it if it was set by the environment. Note that this
* attribute only cares if it exists - its value is irrelevant and hence
* not provided
*/
if (orte_rmaps_base.no_use_local) {
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_NO_USE_LOCAL,
ORTE_UNDEF, NULL,
ORTE_RMGR_ATTR_NO_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
if (NULL == (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_NO_OVERSUB))) {
/* was NOT provided - add it if it was set by the environment. Note that this
* attribute only cares if it exists - its value is irrelevant and hence
* not provided
*/
if (!orte_rmaps_base.oversubscribe) {
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_NO_OVERSUB,
ORTE_UNDEF, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* check no-oversubscribe - add it if it was set by the environment. Note that this
* attribute only cares if it exists - its value is irrelevant and hence
* not provided
*/
if (!orte_rmaps_base.oversubscribe) {
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_NO_OVERSUB,
ORTE_UNDEF, NULL,
ORTE_RMGR_ATTR_NO_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
@ -134,11 +172,34 @@ int orte_rmaps_base_map_job(orte_jobid_t job, opal_list_t *attributes)
return ORTE_ERR_NOT_FOUND;
}
/* go ahead and map the job */
if (ORTE_SUCCESS != (rc = module->map_job(job, attributes))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* store the mapping plan in case we need it later. We need to do this AFTER
* the mapping component finishes in case the component added/modified the
* attributes. The component should, at the least, have updated the
* attribute indicating where it stopped so that any subsequent mappings by
* child jobs can know where to start
*/
if (ORTE_SUCCESS != (rc = orte_rmaps_base_store_mapping_plan(job, attributes))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if we were using a parent policy, then we need to update that job's info
* on where we finished mapping. The mapping components provide that info
* via the attributes
*/
if (ORTE_JOBID_INVALID != parent_job) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_update_mapping_state(parent_job, attributes))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -43,14 +43,14 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
{
orte_job_map_t *mapping;
orte_mapped_proc_t *proc;
orte_cellid_t *cellptr, cell;
orte_cellid_t *cellptr, cell=ORTE_CELLID_INVALID;
orte_std_cntr_t *sptr;
bool *bptr, oversub;
bool *bptr, oversub=false;
pid_t *pidptr;
orte_process_name_t *pptr;
char *segment;
char *node_name;
char *username;
char *node_name=NULL;
char *username=NULL;
orte_gpr_value_t **values, *value;
orte_gpr_keyval_t* keyval;
orte_std_cntr_t v, kv, num_values;
@ -411,3 +411,196 @@ cleanup:
return rc;
}
/* Mapping plans are associated with a job - hence, they are stored in the job's
* container on the JOB_MASTER_SEGMENT
*/
int orte_rmaps_base_store_mapping_plan(orte_jobid_t job, opal_list_t *attr_list)
{
int rc;
orte_attribute_t *attr;
orte_gpr_value_t *value;
orte_std_cntr_t i, j, num_attrs_found, num_tokens;
char *attrs[] = {
ORTE_RMAPS_MAP_POLICY,
ORTE_RMAPS_PERNODE,
ORTE_RMAPS_NO_USE_LOCAL,
ORTE_RMAPS_NO_OVERSUB,
ORTE_RMAPS_DESIRED_MAPPER,
ORTE_RMAPS_USE_PARENT_PLAN,
ORTE_RMAPS_BOOKMARK
};
orte_std_cntr_t num_attrs_defd;
OPAL_TRACE(2);
num_attrs_defd = sizeof(attrs)/sizeof(char*);
/* count the number of attributes we will need to store */
num_attrs_found = 0;
for (i=0; i < num_attrs_defd; i++) {
if (NULL != orte_rmgr.find_attribute(attr_list, attrs[i])) num_attrs_found++;
}
/* if nothing found, then nothing to do! */
if (0 == num_attrs_found) return ORTE_SUCCESS;
/* setup to store the found values */
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value,
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
ORTE_JOBINFO_SEGMENT, num_attrs_found, 0))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* setup the tokens to point to this job's container */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_tokens(&(value->tokens), &num_tokens, job))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return rc;
}
/* copy the data that is to be stored */
for (i=0, j=0; i < num_attrs_defd; i++) {
if (NULL != (attr = orte_rmgr.find_attribute(attr_list, attrs[i]))) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[j]), attr->key,
attr->value->type, attr->value->data))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return rc;
}
j++;
}
}
/* put the data onto the registry */
if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &value))) {
ORTE_ERROR_LOG(rc);
}
/* cleanup memory */
OBJ_RELEASE(value);
return rc;
}
int orte_rmaps_base_get_mapping_plan(orte_jobid_t job, opal_list_t *attr_list)
{
int rc;
orte_gpr_value_t **values, *value;
orte_gpr_keyval_t *kval;
orte_std_cntr_t i, num_vals, num_tokens;
char *attrs[] = {
ORTE_RMAPS_MAP_POLICY,
ORTE_RMAPS_PERNODE,
ORTE_RMAPS_NO_USE_LOCAL,
ORTE_RMAPS_NO_OVERSUB,
ORTE_RMAPS_DESIRED_MAPPER,
ORTE_RMAPS_USE_PARENT_PLAN,
ORTE_RMAPS_BOOKMARK
};
orte_std_cntr_t num_attrs_defd;
char **tokens;
OPAL_TRACE(2);
num_attrs_defd = sizeof(attrs)/sizeof(char*);
/* setup the tokens to point to this job's container */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_tokens(&tokens, &num_tokens, job))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* query the mapping plan data from the registry */
if (ORTE_SUCCESS != (rc = orte_gpr.get(ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
ORTE_JOBINFO_SEGMENT,
tokens, attrs,
&num_vals, &values))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* should only be one value returned here since there is only one
* container/job on the segment - error otherwise
*/
if (1 != num_vals) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
return ORTE_ERR_GPR_DATA_CORRUPT;
}
/* update the data on the list. This will OVERWRITE any matching data
* on that list....USER BEWARE!
*/
value = values[0];
for (i=0; i < value->cnt; i++) {
kval = value->keyvals[i];
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attr_list, kval->key,
kval->value->type,
kval->value->data,
ORTE_RMGR_ATTR_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return rc;
}
}
OBJ_RELEASE(value);
return ORTE_SUCCESS;
}
int orte_rmaps_base_update_mapping_state(orte_jobid_t parent_job,
opal_list_t *attrs)
{
int rc;
orte_attribute_t *attr;
orte_gpr_value_t *value;
orte_std_cntr_t num_tokens;
OPAL_TRACE(2);
/* see if the bookmark is present - if not, we report this as an error so
* that the RMAPS component developer can correct it
*/
if (NULL == (attr = orte_rmgr.find_attribute(attrs, ORTE_RMAPS_BOOKMARK))) {
return ORTE_ERR_NOT_FOUND;
}
/* setup to store the bookmark */
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value,
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
ORTE_JOBINFO_SEGMENT, 1, 0))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* setup the tokens to point to this job's container */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_tokens(&(value->tokens), &num_tokens, parent_job))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return rc;
}
/* copy the data that is to be stored */
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), attr->key,
attr->value->type, attr->value->data))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return rc;
}
/* put the data onto the registry */
if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &value))) {
ORTE_ERROR_LOG(rc);
}
/* cleanup memory */
OBJ_RELEASE(value);
return rc;
}

Просмотреть файл

@ -110,6 +110,35 @@ ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_c
ORTE_DECLSPEC int orte_rmaps_base_put_job_map(orte_job_map_t *map);
/*
* Store a mapping plan
* Given a list of attributes, this function stores all the RMAPS-specific
* attributes on the registry for later use - e.g., by a child job that
* wants to be mapped in an fashion identical to that of its parent
*/
int orte_rmaps_base_store_mapping_plan(orte_jobid_t job, opal_list_t *attrs);
/*
* Get a mapping plan
* Given a jobid, retrieve the stored mapping plan for that job. The
* RMAPS-specific attributes will UPDATE the provided list to avoid
* the possibility of duplicate list entries. Any existing RMAPS-specific
* entries on the provided list will, therefore, be OVERWRITTEN.
*/
int orte_rmaps_base_get_mapping_plan(orte_jobid_t job, opal_list_t *attrs);
/*
* Update the mapping state
* Dynamically spawned child jobs that share resources with their parent
* need to know where the parent job stopped mapping so they can pickup
* from the right place. Once the child is mapped, however, we need to update
* that info for the *parent* so that any additional children can have the
* right info.
*/
int orte_rmaps_base_update_mapping_state(orte_jobid_t parent_job,
opal_list_t *attrs);
/*
* communication functions
*/

Просмотреть файл

@ -39,6 +39,8 @@ extern "C" {
#define ORTE_RMAPS_NO_USE_LOCAL "orte-map-no-use-local"
#define ORTE_RMAPS_NO_OVERSUB "orte-map-no-oversubscribe"
#define ORTE_RMAPS_DESIRED_MAPPER "orte-map-desired"
#define ORTE_RMAPS_USE_PARENT_PLAN "orte-map-use-parent-plan"
#define ORTE_RMAPS_BOOKMARK "orte-map-bookmark"
/**** JOB_MAP OBJECTS ***/
/*

Просмотреть файл

@ -35,6 +35,7 @@
#include "opal/util/show_help.h"
#include "opal/util/argv.h"
#include "orte/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/gpr/gpr.h"
@ -288,6 +289,8 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes)
orte_std_cntr_t num_procs = 0, total_num_slots, mapped_num_slots;
int rc;
bool modify_app_context = false;
char *sptr;
orte_attribute_t *attr;
OPAL_TRACE(1);
@ -326,31 +329,33 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid, opal_list_t *attributes)
return rc;
}
/* initialize the cur_node_item to point to the first node in the list that has
* an available slot. We need to check the slot availability since we may be
* mapping a child job onto the same nodes used by its parent. In that case,
* even though we may have used some slots on a node, the system still considers
* the node available due to oversubscription rules. However, we don't want to
* start at the beginning of the nodelist again as we will be oversubscribing the
* node and causing majorly poor performance
*/
for (cur_node_item = opal_list_get_first(&master_node_list);
cur_node_item != opal_list_get_end(&master_node_list);
cur_node_item = opal_list_get_next(cur_node_item)) {
#if 0
node = (orte_ras_node_t*)cur_node_item;
if (node->node_slots > node->node_slots_inuse) {
goto MOVEON;
/* if a bookmark exists from some prior mapping, set us to start there */
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMAPS_BOOKMARK))) {
cur_node_item = NULL;
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, attr->value, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
#endif
goto MOVEON; /* until I get this fixed...this runs slow, but doesn't hang */
/* find this node on the master list */
for (item = opal_list_get_first(&master_node_list);
item != opal_list_get_end(&master_node_list);
item = opal_list_get_next(item)) {
node = (orte_ras_node_t*)item;
if (0 != strcmp(sptr, node->node_name)) {
cur_node_item = item;
break;
}
}
/* see if we found it - if not, just start at the beginning */
if (NULL == cur_node_item) {
cur_node_item = opal_list_get_first(&master_node_list);
}
} else {
/* if no bookmark, then just start at the beginning of the list */
cur_node_item = opal_list_get_first(&master_node_list);
}
/* if we got here, then everyone is at or above the soft limit - just
* start with the first node on the list
*/
cur_node_item = opal_list_get_first(&master_node_list);
MOVEON:
/** construct the list to hold any nodes that get fully used during this
* mapping. We need to keep a record of these so we can update their
* information on the registry when we are done, but we want to remove
@ -561,9 +566,19 @@ MOVEON:
if (modify_app_context) {
if (ORTE_SUCCESS != (rc = orte_rmgr.store_app_context(jobid, map->apps, 1))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
/* save a bookmark indicating what node we finished with so that subsequent children (if any)
* can start at the right place
*/
node = (orte_ras_node_t*)cur_node_item;
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(attributes, ORTE_RMAPS_BOOKMARK,
ORTE_STRING, node->node_name,
ORTE_RMGR_ATTR_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
while(NULL != (item = opal_list_remove_first(&master_node_list))) {

Просмотреть файл

@ -62,11 +62,32 @@ orte_attribute_t* orte_rmgr_base_find_attribute(opal_list_t* attr_list, char* ke
* ADD ATTRIBUTE
*/
int orte_rmgr_base_add_attribute(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data)
orte_data_type_t type, void *data,
bool overwrite)
{
int rc;
orte_attribute_t *kval;
orte_gpr_keyval_t *kval;
orte_attribute_t *attr;
/* see if this attribute is already present */
if (NULL != (attr = orte_rmgr_base_find_attribute(attr_list, key))) {
/** found it - do we want to replace this value? */
if (overwrite) {
/* yes - remove the existing value, we will add
* the new value down below
*/
opal_list_remove_item(attr_list, (opal_list_item_t*)attr);
OBJ_RELEASE(attr);
goto ADD_ITEM;
}
/* don't overwrite, so just return - it is okay for us NOT to update if
* overwrite is set to "no"
*/
return ORTE_SUCCESS;
}
ADD_ITEM:
/** didn't find it or replacing the old one - add the attribute */
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&kval, key, type, data))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -77,36 +98,33 @@ int orte_rmgr_base_add_attribute(opal_list_t* attr_list, char* key,
return ORTE_SUCCESS;
}
/*
* UPDATE ATTRIBUTE
* MERGE ATTRIBUTES
*/
int orte_rmgr_base_update_attribute(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data)
int orte_rmgr_base_merge_attributes(opal_list_t* target, opal_list_t* source, bool override)
{
opal_list_item_t *item;
orte_attribute_t *kval;
int rc;
opal_list_item_t *item;
orte_attribute_t *attr;
for (item = opal_list_get_first(attr_list);
item != opal_list_get_end(attr_list);
/* Since the add_attribute function takes care of the override issue, we just
* need to cycle through the source list and "add" everything to the target
*/
for (item = opal_list_get_first(source);
item != opal_list_get_end(source);
item = opal_list_get_next(item)) {
kval = (orte_attribute_t*)item;
if (strcmp(key, kval->key) == 0) {
/** found it - replace the value by releasing
* this item and replacing it with a new one
*/
opal_list_remove_item(attr_list, item);
OBJ_RELEASE(item);
goto ADD_ITEM;
attr = (orte_attribute_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmgr_base_add_attribute(target, attr->key,
attr->value->type,
attr->value->data,
override))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
ADD_ITEM:
/** didn't find it or replacing the old one - add the attribute */
if (ORTE_SUCCESS != (rc = orte_rmgr_base_add_attribute(attr_list, key, type, data))) {
ORTE_ERROR_LOG(rc);
}
return rc;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -59,7 +59,7 @@ orte_rmgr_base_module_t orte_rmgr = {
/** SUPPORT FUNCTIONS ***/
orte_rmgr_base_find_attribute,
orte_rmgr_base_add_attribute,
orte_rmgr_base_update_attribute,
orte_rmgr_base_merge_attributes,
orte_rmgr_base_delete_attribute,
orte_rmgr_base_get_app_context,
orte_rmgr_base_put_app_context,

Просмотреть файл

@ -55,9 +55,8 @@ extern "C" {
typedef uint8_t orte_rmgr_cmd_t;
/*
* Internal definitions
* Base functions that are common to all implementations - can be overridden
*/
ORTE_DECLSPEC int orte_rmgr_base_get_app_context(
orte_jobid_t jobid,
orte_app_context_t*** app_context,
@ -85,10 +84,27 @@ ORTE_DECLSPEC int orte_rmgr_base_set_vpid_range(orte_jobid_t jobid, orte_vpid_t
ORTE_DECLSPEC int orte_rmgr_base_get_vpid_range(orte_jobid_t jobid, orte_vpid_t *start, orte_vpid_t *range);
ORTE_DECLSPEC int orte_rmgr_base_connect(orte_std_cntr_t num_connect,
orte_process_name_t *connect);
ORTE_DECLSPEC int orte_rmgr_base_disconnect(orte_std_cntr_t num_disconnect,
orte_process_name_t *disconnect);
orte_gpr_keyval_t* orte_rmgr_base_find_attribute(opal_list_t* attr_list, char* key);
int orte_rmgr_base_add_attribute(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data,
bool overwrite);
int orte_rmgr_base_merge_attributes(opal_list_t* target, opal_list_t* source, bool override);
int orte_rmgr_base_delete_attribute(opal_list_t* attr_list, char* key);
/*
* Base functions that are common to all implementations - can be overridden
* Internal definitions
*/
int orte_rmgr_base_create_not_available(
orte_app_context_t** app_context,
orte_std_cntr_t num_context,
@ -104,22 +120,6 @@ int orte_rmgr_base_spawn_not_available(
orte_proc_state_t cb_conditions,
opal_list_t *attributes);
ORTE_DECLSPEC int orte_rmgr_base_connect(orte_std_cntr_t num_connect,
orte_process_name_t *connect);
ORTE_DECLSPEC int orte_rmgr_base_disconnect(orte_std_cntr_t num_disconnect,
orte_process_name_t *disconnect);
orte_gpr_keyval_t* orte_rmgr_base_find_attribute(opal_list_t* attr_list, char* key);
int orte_rmgr_base_add_attribute(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data);
int orte_rmgr_base_update_attribute(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data);
int orte_rmgr_base_delete_attribute(opal_list_t* attr_list, char* key);
int orte_rmgr_base_finalize_not_available(void);
/*

Просмотреть файл

@ -83,6 +83,15 @@ static int orte_rmgr_cnos_get_vpid_range(orte_jobid_t jobid,
orte_vpid_t *start,
orte_vpid_t *range);
static orte_gpr_keyval_t* orte_rmgr_cnos_find_attribute(opal_list_t* attr_list, char* key);
static int orte_rmgr_cnos_add_attribute(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data, bool overwrite);
static int orte_rmgr_cnos_merge_attributes(opal_list_t* target, opal_list_t* source, bool override);
static int orte_rmgr_cnos_delete_attribute(opal_list_t* attr_list, char* key);
orte_rmgr_base_module_t orte_rmgr_cnos_module = {
NULL, /* don't need special init */
orte_rmgr_cnos_setup_job,
@ -91,6 +100,10 @@ orte_rmgr_base_module_t orte_rmgr_cnos_module = {
orte_rmgr_cnos_disconnect,
orte_rmgr_cnos_finalize,
/** SUPPORT FUNCTIONS ***/
orte_rmgr_cnos_find_attribute,
orte_rmgr_cnos_add_attribute,
orte_rmgr_cnos_merge_attributes,
orte_rmgr_cnos_delete_attribute,
orte_rmgr_cnos_get_app_context,
orte_rmgr_cnos_put_app_context,
orte_rmgr_cnos_check_context_cwd,
@ -185,3 +198,25 @@ static int orte_rmgr_cnos_get_vpid_range(orte_jobid_t jobid,
{
return ORTE_ERR_NOT_SUPPORTED;
}
static orte_gpr_keyval_t* orte_rmgr_cnos_find_attribute(opal_list_t* attr_list, char* key)
{
return ORTE_ERR_NOT_SUPPORTED;
}
static int orte_rmgr_cnos_add_attribute(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data)
{
return ORTE_ERR_NOT_SUPPORTED;
}
static int orte_rmgr_cnos_merge_attributes(opal_list_t* target, opal_list_t* source, bool override)
{
return ORTE_ERR_NOT_SUPPORTED;
}
static int orte_rmgr_cnos_delete_attribute(opal_list_t* attr_list, char* key)
{
return ORTE_ERR_NOT_SUPPORTED;
}

Просмотреть файл

@ -68,7 +68,7 @@ orte_rmgr_base_module_t orte_rmgr_proxy_module = {
/** SUPPORT FUNCTIONS ***/
orte_rmgr_base_find_attribute,
orte_rmgr_base_add_attribute,
orte_rmgr_base_update_attribute,
orte_rmgr_base_merge_attributes,
orte_rmgr_base_delete_attribute,
orte_rmgr_base_get_app_context,
orte_rmgr_base_put_app_context,

Просмотреть файл

@ -139,7 +139,7 @@ typedef int (*orte_rmgr_base_module_finalize_fn_t)(void);
/**
* Find an attribute
* Given a pointer array of attributes, return a pointer to the specified attribute
* Given a list of attributes, return a pointer to the specified attribute
*
* @param attr_list A pointer to the list of attributes
* @param key The key indicating the attribute to be returned.
@ -153,9 +153,13 @@ typedef orte_attribute_t* (*orte_rmgr_base_module_find_attribute_fn_t)(opal_list
/**
* Add an attribute
* Given a pointer array of attributes and the data for a new attribute,
* Given a list of attributes and the data for a new attribute,
* this function will create the gpr_keyval_t object for that attribute,
* populate it with the provided data, and append it to the list.
* populate it with the provided data, and append it to the list. If
* overwrite is set to true AND the value is found on the list, then
* it will be overwritten with the new value. If overwrite is NOT set
* and the value is found on the list, it will be left alone - the value
* will NOT be updated with the one provided.
*
* @param attr_list A pointer to the list of attributes
* @param key The key for the attribute.
@ -164,19 +168,22 @@ typedef orte_attribute_t* (*orte_rmgr_base_module_find_attribute_fn_t)(opal_list
* the existence of the attribute on the list is all that is required.
* @param data A pointer to the data to be stored in the attribute. NULL
* is acceptable IF the data type is ORTE_UNDEF.
* @param overwrite Indicates if a pre-existing value can be overwritten or not
* @retval ORTE_SUCCESS Attribute was added to list.
* @retval ORTE_ERROR An appropriate error code indicating what went wrong.
*/
typedef int (*orte_rmgr_base_module_add_attribute_fn_t)(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data);
orte_data_type_t type, void *data,
bool overwrite);
/**
* Update an attribute
* Given a pointer array of attributes and the data for a new attribute,
* this function will find the attribute matching the given key and
* replace the current value with the one given. If the attribute is NOT
* found on the list, it will be added to it.
* Merge two attribute lists
* Given two lists of attributes, this function will merge the second list into
* the first. The boolean defines how to handle matching entries - if set to
* true (ORTE_RMGR_ATTR_OVERRIDE), entries in the second list will OVERWRITE
* matching entries in the first list. If set to false (ORTE_RMGR_ATTR_NO_OVERRIDE)
* matching entries in the second list will be ignored.
*
* @param attr_list A pointer to the list of attributes
* @param key The key for the attribute.
@ -188,8 +195,9 @@ typedef int (*orte_rmgr_base_module_add_attribute_fn_t)(opal_list_t* attr_list,
* @retval ORTE_SUCCESS Attribute was added to list.
* @retval ORTE_ERROR An appropriate error code indicating what went wrong.
*/
typedef int (*orte_rmgr_base_module_update_attribute_fn_t)(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data);
typedef int (*orte_rmgr_base_module_merge_attributes_fn_t)(opal_list_t* target,
opal_list_t* source,
bool override);
/**
@ -266,7 +274,7 @@ struct orte_rmgr_base_module_2_0_0_t {
/** SUPPORT FUNCTIONS ***/
orte_rmgr_base_module_find_attribute_fn_t find_attribute;
orte_rmgr_base_module_add_attribute_fn_t add_attribute;
orte_rmgr_base_module_update_attribute_fn_t update_attribute;
orte_rmgr_base_module_merge_attributes_fn_t merge_attributes;
orte_rmgr_base_module_delete_attribute_fn_t delete_attribute;
orte_rmgr_base_module_get_app_context_fn_t get_app_context;
orte_rmgr_base_module_store_app_context_fn_t store_app_context;

Просмотреть файл

@ -39,7 +39,13 @@ extern "C" {
*/
typedef orte_gpr_keyval_t orte_attribute_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_attribute_t);
/* define some booleans to make code more legible. These
* control the action of the rmgr.merge_attributes function
*/
#define ORTE_RMGR_ATTR_NO_OVERRIDE false
#define ORTE_RMGR_ATTR_OVERRIDE true
/* RESOURCE MANAGER DATA TYPES */

Просмотреть файл

@ -77,7 +77,7 @@ orte_rmgr_base_module_t orte_rmgr_urm_module = {
/** SUPPORT FUNCTIONS ***/
orte_rmgr_base_find_attribute,
orte_rmgr_base_add_attribute,
orte_rmgr_base_update_attribute,
orte_rmgr_base_merge_attributes,
orte_rmgr_base_delete_attribute,
orte_rmgr_base_get_app_context,
orte_rmgr_base_put_app_context,