1
1
openmpi/orte/runtime/orte_globals_class_instances.h
Ralph Castain a1d296ae03 This commit fixes ticket #1410
Fix a few bugs in the mappers:

1. Ensure that bynode with no -np fills all available slots - it just does so with the ranks set bynode instead of byslot

2. fix --nolocal behavior so it works correctly in all cases. We still have to test the host's name using opal_ifislocal in the mapper because the name returned by gethostname to orte_process_info.hostname can be an FQDN, but a hostfile may contain a non-FQDN version.

3. Add missing --nolocal logic to the seq mapper

Oversubscribed mapping seemed to be working okay without repair, so I couldn't verify my own bug report in that regard.

Also included are some preliminary changes to support the modified hostfile behavior, which will be committed shortly:

1. removed the totally useless "allocate" field in the orte_node_t object since every node is automatically allocated for use - and everything ignored the field anyway

2. correctly initialize the slots_alloc field when the allocation is read

This commit was SVN r19030.
2008-07-25 13:35:12 +00:00

376 строки
9.5 KiB
C

/* -*- C -*-
*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2008 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
/** @file
*/
#ifndef ORTE_RUNTIME_GLOBAL_CLASS_INSTANCES_H_
#define ORTE_RUNTIME_GLOBAL_CLASS_INSTANCES_H_
#include "orte_config.h"
#include "orte/types.h"
#include "opal/util/argv.h"
#include "orte/mca/plm/plm_types.h"
#include "orte/runtime/orte_globals.h"
BEGIN_C_DECLS
/*
* CONSTRUCTORS, DESTRUCTORS, AND CLASS INSTANTIATIONS
* FOR ORTE CLASSES
*/
static void orte_app_context_construct(orte_app_context_t* app_context)
{
app_context->idx=0;
app_context->app=NULL;
app_context->num_procs=0;
app_context->argv=NULL;
app_context->env=NULL;
app_context->cwd=NULL;
app_context->user_specified_cwd=false;
app_context->hostfile=NULL;
app_context->add_hostfile=NULL;
app_context->dash_host = NULL;
app_context->prefix_dir = NULL;
app_context->preload_binary = false;
app_context->preload_files = NULL;
app_context->preload_files_dest_dir = NULL;
}
static void orte_app_context_destructor(orte_app_context_t* app_context)
{
if (NULL != app_context->app) {
free (app_context->app);
}
/* argv and env lists created by util/argv copy functions */
if (NULL != app_context->argv) {
opal_argv_free(app_context->argv);
}
if (NULL != app_context->env) {
opal_argv_free(app_context->env);
}
if (NULL != app_context->cwd) {
free (app_context->cwd);
}
if (NULL != app_context->hostfile) {
free(app_context->hostfile);
}
if (NULL != app_context->add_hostfile) {
free(app_context->add_hostfile);
}
if (NULL != app_context->dash_host) {
opal_argv_free(app_context->dash_host);
}
if (NULL != app_context->prefix_dir) {
free(app_context->prefix_dir);
}
app_context->preload_binary = false;
if(NULL != app_context->preload_files) {
free(app_context->preload_files);
}
if(NULL != app_context->preload_files_dest_dir) {
free(app_context->preload_files_dest_dir);
}
}
OBJ_CLASS_INSTANCE(orte_app_context_t,
opal_object_t,
orte_app_context_construct,
orte_app_context_destructor);
static void orte_job_construct(orte_job_t* job)
{
job->jobid = ORTE_JOBID_INVALID;
job->apps = OBJ_NEW(opal_pointer_array_t);
opal_pointer_array_init(job->apps,
1,
ORTE_GLOBAL_ARRAY_MAX_SIZE,
2);
job->num_apps = 0;
job->controls = 0;
job->total_slots_alloc = 0;
job->num_procs = 0;
job->procs = OBJ_NEW(opal_pointer_array_t);
opal_pointer_array_init(job->procs,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
ORTE_GLOBAL_ARRAY_MAX_SIZE,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE);
job->map = NULL;
job->bookmark = NULL;
job->oversubscribe_override = false;
job->state = ORTE_JOB_STATE_UNDEF;
job->num_launched = 0;
job->num_reported = 0;
job->num_terminated = 0;
job->abort = false;
job->aborted_proc = NULL;
#if OPAL_ENABLE_FT == 1
job->ckpt_state = 0;
job->ckpt_snapshot_ref = NULL;
job->ckpt_snapshot_loc = NULL;
#endif
}
static void orte_job_destruct(orte_job_t* job)
{
orte_std_cntr_t i;
orte_vpid_t j;
for (i=0; i < job->num_apps; i++) {
if (NULL != job->apps->addr[i]) OBJ_RELEASE(job->apps->addr[i]);
}
OBJ_RELEASE(job->apps);
for (j=0; j < job->num_procs; j++) {
if (NULL != job->procs->addr[j]) OBJ_RELEASE(job->procs->addr[j]);
}
OBJ_RELEASE(job->procs);
if (NULL != job->map) OBJ_RELEASE(job->map);
#if OPAL_ENABLE_FT == 1
if (NULL != job->ckpt_snapshot_ref) {
free(job->ckpt_snapshot_ref);
}
if (NULL != job->ckpt_snapshot_loc) {
free(job->ckpt_snapshot_loc);
}
#endif
}
OBJ_CLASS_INSTANCE(orte_job_t,
opal_list_item_t,
orte_job_construct,
orte_job_destruct);
static void orte_node_construct(orte_node_t* node)
{
node->name = NULL;
node->index = -1;
node->daemon = NULL;
node->daemon_launched = false;
node->launch_id = -1;
node->num_procs = 0;
node->procs = OBJ_NEW(opal_pointer_array_t);
opal_pointer_array_init(node->procs,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
ORTE_GLOBAL_ARRAY_MAX_SIZE,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE);
node->next_node_rank = 0;
node->oversubscribed = false;
node->arch = 0;
node->state = ORTE_NODE_STATE_UNKNOWN;
node->slots = 0;
node->slots_inuse = 0;
node->slots_alloc = 0;
node->slots_max = 0;
node->username = NULL;
node->slot_list = NULL;
}
static void orte_node_destruct(orte_node_t* node)
{
orte_vpid_t i;
if (NULL != node->name) {
free(node->name);
}
if (NULL != node->daemon) OBJ_RELEASE(node->daemon);
for (i=0; i < node->num_procs; i++) {
if (NULL != node->procs->addr[i]) OBJ_RELEASE(node->procs->addr[i]);
}
OBJ_RELEASE(node->procs);
if (NULL != node->username) {
free(node->username);
}
}
OBJ_CLASS_INSTANCE(orte_node_t,
opal_list_item_t,
orte_node_construct,
orte_node_destruct);
static void orte_proc_construct(orte_proc_t* proc)
{
proc->name = *ORTE_NAME_INVALID;
proc->pid = 0;
proc->local_rank = UINT8_MAX;
proc->node_rank = UINT8_MAX;
proc->state = ORTE_PROC_STATE_UNDEF;
proc->app_idx = -1;
proc->slot_list = NULL;
proc->node = NULL;
proc->nodename = NULL;
proc->rml_uri = NULL;
proc->beat = 0;
#if OPAL_ENABLE_FT == 1
proc->ckpt_state = 0;
proc->ckpt_snapshot_ref = NULL;
proc->ckpt_snapshot_loc = NULL;
#endif
}
static void orte_proc_destruct(orte_proc_t* proc)
{
/* do NOT free the nodename field as this is
* simply a pointer to a field in the
* associated node object - the node object
* will free it
*/
if (NULL != proc->slot_list) {
free(proc->slot_list);
}
if (NULL != proc->node) OBJ_RELEASE(proc->node);
if (NULL != proc->rml_uri) free(proc->rml_uri);
#if OPAL_ENABLE_FT == 1
if (NULL != proc->ckpt_snapshot_ref) {
free(proc->ckpt_snapshot_ref);
}
if (NULL != proc->ckpt_snapshot_loc) {
free(proc->ckpt_snapshot_loc);
}
#endif
}
OBJ_CLASS_INSTANCE(orte_proc_t,
opal_list_item_t,
orte_proc_construct,
orte_proc_destruct);
static void orte_nid_construct(orte_nid_t *ptr)
{
ptr->name = NULL;
ptr->daemon = ORTE_VPID_INVALID;
ptr->arch = orte_process_info.arch;
}
static void orte_nid_destruct(orte_nid_t *ptr)
{
if (NULL != ptr->name) {
free(ptr->name);
}
}
OBJ_CLASS_INSTANCE(orte_nid_t,
opal_object_t,
orte_nid_construct,
orte_nid_destruct);
static void orte_pmap_construct(orte_pmap_t *ptr)
{
ptr->node = -1;
ptr->local_rank = 0;
ptr->node_rank = 0;
}
OBJ_CLASS_INSTANCE(orte_pmap_t,
opal_object_t,
orte_pmap_construct,
NULL);
static void orte_jmap_construct(orte_jmap_t *ptr)
{
ptr->job = ORTE_JOBID_INVALID;
OBJ_CONSTRUCT(&ptr->pmap, opal_value_array_t);
opal_value_array_init(&ptr->pmap, sizeof(orte_pmap_t));
}
static void orte_jmap_destruct(orte_jmap_t *ptr)
{
OBJ_DESTRUCT(&ptr->pmap);
}
OBJ_CLASS_INSTANCE(orte_jmap_t,
opal_object_t,
orte_jmap_construct,
orte_jmap_destruct);
static void orte_job_map_construct(orte_job_map_t* map)
{
map->policy = ORTE_RMAPS_BYSLOT; /* default to byslot mapping as per orterun options */
map->pernode = false;
map->npernode = 0;
map->oversubscribe = true; /* default to allowing oversubscribe */
map->display_map = false;
map->cpu_lists = false;
map->num_new_daemons = 0;
map->daemon_vpid_start = ORTE_VPID_INVALID;
map->num_nodes = 0;
map->nodes = OBJ_NEW(opal_pointer_array_t);
opal_pointer_array_init(map->nodes,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
ORTE_GLOBAL_ARRAY_MAX_SIZE,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE);
}
static void orte_job_map_destruct(orte_job_map_t* map)
{
orte_std_cntr_t i;
for (i=0; i < map->nodes->size; i++) {
if (NULL != map->nodes->addr[i]) {
OBJ_RELEASE(map->nodes->addr[i]);
}
}
OBJ_RELEASE(map->nodes);
}
OBJ_CLASS_INSTANCE(orte_job_map_t,
opal_object_t,
orte_job_map_construct,
orte_job_map_destruct);
END_C_DECLS
#endif /* ORTE_RUNTIME_GLOBAL_CLASS_INSTANCES_H_ */