1
1
openmpi/orte/mca/ras/base/ras_base_node.c
2006-04-27 17:31:01 +00:00

743 строки
23 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <string.h>
#include "orte/orte_constants.h"
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/soh/soh_types.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/ras/base/ras_base_node.h"
static void orte_ras_base_node_construct(orte_ras_node_t* node)
{
node->node_name = NULL;
node->node_arch = NULL;
node->node_cellid = 0;
node->node_state = ORTE_NODE_STATE_UNKNOWN;
node->node_slots = 0;
node->node_slots_inuse = 0;
node->node_slots_alloc = 0;
node->node_slots_max = 0;
node->node_username = NULL;
node->node_launched = 0;
}
static void orte_ras_base_node_destruct(orte_ras_node_t* node)
{
if (NULL != node->node_name) {
free(node->node_name);
}
if (NULL != node->node_arch) {
free(node->node_arch);
}
if (NULL != node->node_username) {
free(node->node_username);
}
}
OBJ_CLASS_INSTANCE(
orte_ras_node_t,
opal_list_item_t,
orte_ras_base_node_construct,
orte_ras_base_node_destruct);
/*
* Query the registry for all available nodes
*/
int orte_ras_base_node_query(opal_list_t* nodes)
{
size_t i, cnt, *sptr;
orte_node_state_t *nsptr;
orte_cellid_t *cptr;
orte_gpr_value_t** values;
int rc;
/* query all node entries */
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
ORTE_NODE_SEGMENT,
NULL,
NULL,
&cnt,
&values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* parse the response */
for(i=0; i<cnt; i++) {
orte_gpr_value_t* value = values[i];
orte_ras_node_t* node = OBJ_NEW(orte_ras_node_t);
size_t k;
for(k=0; k<value->cnt; k++) {
orte_gpr_keyval_t* keyval = value->keyvals[k];
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* we use the dss.copy function here instead of strdup because that function
* automatically protects us against a NULL (or zero-length) string
*/
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(node->node_name), keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_ARCH_KEY) == 0) {
/* we use the dss.copy function here instead of strdup because that function
* automatically protects us against a NULL (or zero-length) string
*/
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(node->node_arch), keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_STATE_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&nsptr, keyval->value, ORTE_NODE_STATE))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_state = *nsptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_SLOTS_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_SIZE))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_slots = *sptr;
continue;
}
if(strncmp(keyval->key, ORTE_NODE_SLOTS_ALLOC_KEY, strlen(ORTE_NODE_SLOTS_ALLOC_KEY)) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_SIZE))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_slots_inuse += *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_SLOTS_MAX_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_SIZE))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_slots_max = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_USERNAME_KEY) == 0) {
/* we use the dss.copy function here instead of strdup because that function
* automatically protects us against a NULL (or zero-length) string
*/
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(node->node_username), keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
continue;
}
if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, keyval->value, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_cellid = *cptr;
continue;
}
}
opal_list_append(nodes, &node->super);
OBJ_RELEASE(value);
}
if (NULL != values) free(values);
return ORTE_SUCCESS;
}
/*
* Query the registry for all nodes allocated to a specified job
*/
int orte_ras_base_node_query_alloc(opal_list_t* nodes, orte_jobid_t jobid)
{
char* keys[] = {
ORTE_NODE_NAME_KEY,
ORTE_NODE_ARCH_KEY,
ORTE_NODE_STATE_KEY,
ORTE_NODE_SLOTS_KEY,
ORTE_NODE_SLOTS_ALLOC_KEY,
ORTE_NODE_SLOTS_MAX_KEY,
ORTE_NODE_USERNAME_KEY,
ORTE_CELLID_KEY,
NULL
};
size_t i, cnt, keys_len;
orte_gpr_value_t** values;
char* jobid_str;
size_t *sptr;
orte_node_state_t *nsptr;
orte_cellid_t *cptr;
int rc;
if(ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_str, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
asprintf(&keys[4], "%s-%s", ORTE_NODE_SLOTS_ALLOC_KEY, jobid_str);
keys_len = strlen(keys[4]);
free(jobid_str);
/* query selected node entries */
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
ORTE_NODE_SEGMENT,
NULL,
keys,
&cnt,
&values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* parse the response */
for(i=0; i<cnt; i++) {
orte_gpr_value_t* value = values[i];
orte_ras_node_t* node;
size_t k;
bool found;
found = false;
for (k = 0; k < value->cnt; k++) {
orte_gpr_keyval_t* keyval = value->keyvals[k];
if(0 == strcmp(keyval->key, keys[4])) {
found = true;
break;
}
}
if (!found)
continue;
node = OBJ_NEW(orte_ras_node_t);
for(k=0; k < value->cnt; k++) {
orte_gpr_keyval_t* keyval = value->keyvals[k];
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* we use the dss.copy function here instead of strdup because that function
* automatically protects us against a NULL (or zero-length) string
*/
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(node->node_name), keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_ARCH_KEY) == 0) {
/* we use the dss.copy function here instead of strdup because that function
* automatically protects us against a NULL (or zero-length) string
*/
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(node->node_arch), keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_STATE_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&nsptr, keyval->value, ORTE_NODE_STATE))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_state = *nsptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_SLOTS_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_SIZE))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_slots = *sptr;
continue;
}
if(strncmp(keyval->key, keys[4], keys_len) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_SIZE))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_slots_inuse += *sptr;
node->node_slots_alloc += *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_SLOTS_MAX_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_SIZE))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_slots_max = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_USERNAME_KEY) == 0) {
/* we use the dss.copy function here instead of strdup because that function
* automatically protects us against a NULL (or zero-length) string
*/
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(node->node_username), keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
continue;
}
if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, keyval->value, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_cellid = *cptr;
continue;
}
}
/* in case we get back more than we asked for */
if(node->node_slots_inuse == 0) {
OBJ_RELEASE(node);
} else {
opal_list_append(nodes, &node->super);
}
OBJ_RELEASE(value);
}
free (keys[4]);
if (NULL != values) free(values);
return ORTE_SUCCESS;
}
/*
* Query the registry for a specific node
*/
orte_ras_node_t* orte_ras_base_node_lookup(orte_cellid_t cellid, const char* node_name)
{
orte_ras_node_t* node = NULL;
size_t i, cnt, num_tokens;
size_t *sptr;
orte_cellid_t *cptr;
orte_node_state_t *nsptr;
orte_gpr_value_t** values;
char** tokens = NULL;
int rc;
rc = orte_schema.get_node_tokens(&tokens, &num_tokens, cellid, (char*)node_name);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return NULL;
}
/* query specific entry */
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
ORTE_NODE_SEGMENT,
tokens,
NULL,
&cnt,
&values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return NULL;
}
/* parse the response */
for(i=0; i<cnt; i++) {
orte_gpr_value_t* value = values[i];
size_t k;
node = OBJ_NEW(orte_ras_node_t);
for(k=0; k<value->cnt; k++) {
orte_gpr_keyval_t* keyval = value->keyvals[k];
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
/* we use the dss.copy function here instead of strdup because that function
* automatically protects us against a NULL (or zero-length) string
*/
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(node->node_name), keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_ARCH_KEY) == 0) {
/* we use the dss.copy function here instead of strdup because that function
* automatically protects us against a NULL (or zero-length) string
*/
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(node->node_arch), keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_STATE_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&nsptr, keyval->value, ORTE_NODE_STATE))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_state = *nsptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_SLOTS_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_SIZE))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_slots = *sptr;
continue;
}
if(strncmp(keyval->key, ORTE_NODE_SLOTS_ALLOC_KEY, strlen(ORTE_NODE_SLOTS_ALLOC_KEY)) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_SIZE))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_slots_inuse += *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_SLOTS_MAX_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_SIZE))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_slots_max = *sptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_USERNAME_KEY) == 0) {
/* we use the dss.copy function here instead of strdup because that function
* automatically protects us against a NULL (or zero-length) string
*/
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(node->node_username), keyval->value->data, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
continue;
}
if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, keyval->value, ORTE_CELLID))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->node_cellid = *cptr;
continue;
}
}
OBJ_RELEASE(values[i]);
break;
}
for(i=1; i<cnt; i++)
OBJ_RELEASE(values[i]);
if (NULL != values) free(values);
opal_argv_free(tokens);
return node;
}
/*
* Add the specified node definitions to the registry
*/
int orte_ras_base_node_insert(opal_list_t* nodes)
{
opal_list_item_t* item;
orte_gpr_value_t **values;
int rc;
size_t num_values, i, j;
char *keys[] = {
ORTE_NODE_NAME_KEY,
ORTE_NODE_ARCH_KEY,
ORTE_NODE_STATE_KEY,
ORTE_CELLID_KEY,
ORTE_NODE_SLOTS_KEY,
ORTE_NODE_SLOTS_MAX_KEY,
ORTE_NODE_USERNAME_KEY
};
orte_data_type_t types[] = {
ORTE_STRING,
ORTE_STRING,
ORTE_NODE_STATE,
ORTE_CELLID,
ORTE_SIZE,
ORTE_SIZE,
ORTE_STRING
};
orte_ras_node_t* node;
num_values = opal_list_get_size(nodes);
if (0 >= num_values) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
values = (orte_gpr_value_t**)malloc(num_values * sizeof(orte_gpr_value_t*));
if (NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
for (i=0; i < num_values; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_AND,
ORTE_NODE_SEGMENT, 7, 0))) {
ORTE_ERROR_LOG(rc);
for (j=0; j < i; j++) {
OBJ_RELEASE(values[j]);
}
free(values);
return rc;
}
}
for(i=0, item = opal_list_get_first(nodes);
i < num_values && item != opal_list_get_end(nodes);
i++, item = opal_list_get_next(item)) {
node = (orte_ras_node_t*)item;
j = 0;
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[j]), keys[j], types[j], node->node_name))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
++j;
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[j]), keys[j], types[j], node->node_arch))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
++j;
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[j]), keys[j], types[j], &(node->node_state)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
++j;
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[j]), keys[j], types[j], &(node->node_cellid)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
++j;
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[j]), keys[j], types[j], &(node->node_slots)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
++j;
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[j]), keys[j], types[j], &(node->node_slots_max)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
++j;
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[j]), keys[j], types[j], node->node_username))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* setup index/keys for this node */
rc = orte_schema.get_node_tokens(&(values[i]->tokens), &(values[i]->num_tokens), node->node_cellid, node->node_name);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
/* try the insert */
if (ORTE_SUCCESS != (rc = orte_gpr.put(num_values, values))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
for (j=0; j < num_values; j++) {
OBJ_RELEASE(values[j]);
}
if (NULL != values) free(values);
return rc;
}
/*
* Delete the specified nodes from the registry
*/
int orte_ras_base_node_delete(opal_list_t* nodes)
{
opal_list_item_t* item;
int rc;
size_t i, num_values, num_tokens;
orte_ras_node_t* node;
char** tokens;
num_values = opal_list_get_size(nodes);
if (0 >= num_values) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
for(item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
item = opal_list_get_next(item)) {
node = (orte_ras_node_t*)item;
/* setup index/keys for this node */
rc = orte_schema.get_node_tokens(&tokens, &num_tokens, node->node_cellid, node->node_name);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_gpr.delete_entries(
ORTE_GPR_TOKENS_AND,
ORTE_NODE_SEGMENT,
tokens,
NULL);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
for (i=0; i < num_tokens; i++) {
free(tokens[i]);
tokens[i] = NULL;
}
if (NULL != tokens) free(tokens);
}
return ORTE_SUCCESS;
}
/*
* Assign the allocated slots on the specified nodes to the
* indicated jobid.
*/
int orte_ras_base_node_assign(opal_list_t* nodes, orte_jobid_t jobid)
{
opal_list_item_t* item;
orte_gpr_value_t **values;
int rc;
size_t num_values, i, j;
orte_ras_node_t* node;
char* jobid_str, *key;
num_values = opal_list_get_size(nodes);
if (0 >= num_values) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
values = (orte_gpr_value_t**)malloc(num_values * sizeof(orte_gpr_value_t*));
if (NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
for (i=0; i < num_values; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]), ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_AND,
ORTE_NODE_SEGMENT, 1, 0))) {
ORTE_ERROR_LOG(rc);
for (j=0; j < i; j++) {
OBJ_RELEASE(values[j]);
}
if (NULL != values) free(values);
return rc;
}
}
for(i=0, item = opal_list_get_first(nodes);
i < num_values && item != opal_list_get_end(nodes);
i++, item = opal_list_get_next(item)) {
node = (orte_ras_node_t*)item;
if(node->node_slots_alloc == 0)
continue;
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_str, jobid))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* setup index/keys for this node */
rc = orte_schema.get_node_tokens(&(values[i]->tokens), &(values[i]->num_tokens), node->node_cellid, node->node_name);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
free(jobid_str);
goto cleanup;
}
/* setup node key/value pairs */
asprintf(&key, "%s-%s", ORTE_NODE_SLOTS_ALLOC_KEY, jobid_str);
free(jobid_str);
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[0]), key, ORTE_SIZE, &(node->node_slots_alloc)))) {
ORTE_ERROR_LOG(rc);
free(key);
goto cleanup;
}
free(key);
}
/* try the insert */
if (ORTE_SUCCESS != (rc = orte_gpr.put(num_values, values))) {
ORTE_ERROR_LOG(rc);
}
cleanup:
for (j=0; j < num_values; j++) {
OBJ_RELEASE(values[j]);
}
if (NULL != values) free(values);
return rc;
}
int orte_ras_base_node_segment_empty(bool *empty)
{
int ret;
opal_list_t nodes;
opal_list_item_t *item;
/* See what's already on the node segment */
OBJ_CONSTRUCT(&nodes, opal_list_t);
if (ORTE_SUCCESS != (ret = orte_ras_base_node_query(&nodes))) {
ORTE_ERROR_LOG(ret);
OBJ_DESTRUCT(&nodes);
return ret;
}
*empty = opal_list_is_empty(&nodes) ? true : false;
/* Free the list */
while (NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
/* All done */
return ORTE_SUCCESS;
}