2006-09-14 21:29:51 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
/** @file:
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef ORTE_MCA_RMAPS_PRIVATE_H
|
|
|
|
#define ORTE_MCA_RMAPS_PRIVATE_H
|
|
|
|
|
|
|
|
/*
|
|
|
|
* includes
|
|
|
|
*/
|
|
|
|
#include "orte_config.h"
|
|
|
|
#include "orte/orte_constants.h"
|
|
|
|
|
|
|
|
#include "orte/mca/ns/ns_types.h"
|
|
|
|
#include "orte/mca/gpr/gpr_types.h"
|
|
|
|
#include "orte/mca/rml/rml_types.h"
|
2006-10-07 15:45:24 +00:00
|
|
|
#include "orte/mca/ras/ras_types.h"
|
2006-09-14 21:29:51 +00:00
|
|
|
|
|
|
|
#include "orte/mca/rmaps/rmaps.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Functions for use solely within the RMAPS framework
|
|
|
|
*/
|
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Define the RMAPS command flag */
|
|
|
|
typedef uint8_t orte_rmaps_cmd_flag_t;
|
|
|
|
#define ORTE_RMAPS_CMD ORTE_UINT8
|
|
|
|
|
|
|
|
/* define some commands */
|
|
|
|
#define ORTE_RMAPS_MAP_CMD 0x01
|
|
|
|
|
|
|
|
/*
|
|
|
|
* RMAPS component/module/priority tuple
|
|
|
|
*/
|
|
|
|
struct orte_rmaps_base_cmp_t {
|
|
|
|
/** Base object */
|
|
|
|
opal_list_item_t super;
|
|
|
|
/** rmaps component */
|
|
|
|
orte_rmaps_base_component_t *component;
|
|
|
|
/** rmaps module */
|
|
|
|
orte_rmaps_base_module_t* module;
|
|
|
|
/** This component's priority */
|
|
|
|
int priority;
|
|
|
|
};
|
|
|
|
/* Convenience typedef */
|
|
|
|
typedef struct orte_rmaps_base_cmp_t orte_rmaps_base_cmp_t;
|
|
|
|
/* Class declaration */
|
2006-10-05 05:22:22 +00:00
|
|
|
OBJ_CLASS_DECLARATION(orte_rmaps_base_cmp_t);
|
2006-09-14 21:29:51 +00:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
2006-10-07 15:45:24 +00:00
|
|
|
* Base API functions
|
2006-09-14 21:29:51 +00:00
|
|
|
*/
|
|
|
|
|
2006-10-07 15:45:24 +00:00
|
|
|
/*
|
|
|
|
* Map a job
|
|
|
|
* All calls to rmaps.map_job are routed through this function. This allows callers to
|
|
|
|
* the RMAPS framework to specify the particular mapper they wish to use.
|
|
|
|
*/
|
2006-10-18 14:01:44 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_map_job(orte_jobid_t job, opal_list_t *attributes);
|
2006-10-07 15:45:24 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Get job map
|
|
|
|
* Retrieve the information for a job map from the registry and reassemble it into
|
|
|
|
* an job_map object. Memory for the job_map object and all of its elements is
|
|
|
|
* allocated by the function
|
|
|
|
*/
|
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t job);
|
2006-09-14 21:29:51 +00:00
|
|
|
|
|
|
|
/*
|
2006-10-07 15:45:24 +00:00
|
|
|
* Get node map
|
|
|
|
* Retrieve the information for a job map from the registry and provide the info
|
|
|
|
* for the specified node
|
2006-09-14 21:29:51 +00:00
|
|
|
*/
|
2006-10-07 15:45:24 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell,
|
|
|
|
char *nodename, orte_jobid_t job);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Registry functions for maps
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* Put job map
|
|
|
|
* Given a pointer to an orte_job_map_t, place the map's information on
|
|
|
|
* the registry. Info is entered into the containers for each individual process on
|
|
|
|
* the job's segment. Additionally, the function sets the INIT counter to the number
|
|
|
|
* of processes in the map, thus causing the INIT trigger to fire so that any
|
|
|
|
* attached subscriptions can be serviced.
|
|
|
|
*/
|
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_put_job_map(orte_job_map_t *map);
|
|
|
|
|
2006-09-14 21:29:51 +00:00
|
|
|
|
2006-10-18 20:02:16 +00:00
|
|
|
/*
|
|
|
|
* Store a mapping plan
|
|
|
|
* Given a list of attributes, this function stores all the RMAPS-specific
|
|
|
|
* attributes on the registry for later use - e.g., by a child job that
|
|
|
|
* wants to be mapped in an fashion identical to that of its parent
|
|
|
|
*/
|
|
|
|
int orte_rmaps_base_store_mapping_plan(orte_jobid_t job, opal_list_t *attrs);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get a mapping plan
|
|
|
|
* Given a jobid, retrieve the stored mapping plan for that job. The
|
|
|
|
* RMAPS-specific attributes will UPDATE the provided list to avoid
|
|
|
|
* the possibility of duplicate list entries. Any existing RMAPS-specific
|
|
|
|
* entries on the provided list will, therefore, be OVERWRITTEN.
|
|
|
|
*/
|
|
|
|
int orte_rmaps_base_get_mapping_plan(orte_jobid_t job, opal_list_t *attrs);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update the mapping state
|
|
|
|
* Dynamically spawned child jobs that share resources with their parent
|
|
|
|
* need to know where the parent job stopped mapping so they can pickup
|
|
|
|
* from the right place. Once the child is mapped, however, we need to update
|
|
|
|
* that info for the *parent* so that any additional children can have the
|
|
|
|
* right info.
|
|
|
|
*/
|
|
|
|
int orte_rmaps_base_update_mapping_state(orte_jobid_t parent_job,
|
|
|
|
opal_list_t *attrs);
|
|
|
|
|
|
|
|
|
2006-09-14 21:29:51 +00:00
|
|
|
/*
|
|
|
|
* communication functions
|
|
|
|
*/
|
|
|
|
int orte_rmaps_base_comm_start(void);
|
|
|
|
int orte_rmaps_base_comm_stop(void);
|
|
|
|
void orte_rmaps_base_recv(int status, orte_process_name_t* sender,
|
|
|
|
orte_buffer_t* buffer, orte_rml_tag_t tag,
|
|
|
|
void* cbdata);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Internal support functions
|
|
|
|
*/
|
2006-10-07 15:45:24 +00:00
|
|
|
/*
|
|
|
|
* Function to add a mapped_proc entry to a map
|
|
|
|
* Scans list of nodes on map to see if the specified one already
|
|
|
|
* exists - if so, just add this entry to that node's list of
|
|
|
|
* procs. If not, then add new node entry and put this proc
|
|
|
|
* on its list.
|
|
|
|
*/
|
2007-02-09 15:06:45 +00:00
|
|
|
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename, int32_t launch_id,
|
2006-10-07 15:45:24 +00:00
|
|
|
char *username, bool oversubscribed, orte_mapped_proc_t *proc);
|
|
|
|
|
2006-10-18 14:01:44 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid,
|
|
|
|
orte_std_cntr_t *total_num_slots, bool no_use_local);
|
2006-10-10 04:54:51 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_update_node_usage(opal_list_t *nodes);
|
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
|
|
|
|
orte_app_context_t *app,
|
|
|
|
opal_list_t *master_node_list,
|
|
|
|
orte_std_cntr_t *total_num_slots);
|
|
|
|
|
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_claim_slot(orte_job_map_t *map,
|
|
|
|
orte_ras_node_t *current_node,
|
|
|
|
orte_jobid_t jobid, orte_vpid_t vpid,
|
|
|
|
orte_std_cntr_t app_idx,
|
|
|
|
opal_list_t *nodes,
|
2006-10-18 14:01:44 +00:00
|
|
|
opal_list_t *fully_used_nodes,
|
|
|
|
bool oversubscribe);
|
2006-09-14 21:29:51 +00:00
|
|
|
|
2006-11-17 19:06:10 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_proxy_map_job(orte_jobid_t job, opal_list_t *attributes);
|
|
|
|
|
Compute and pass the local_rank and local number of procs (in that proc's job) on the node.
To be precise, given this hypothetical launching pattern:
host1: vpids 0, 2, 4, 6
host2: vpids 1, 3, 5, 7
The local_rank for these procs would be:
host1: vpids 0->local_rank 0, v2->lr1, v4->lr2, v6->lr3
host2: vpids 1->local_rank 0, v3->lr1, v5->lr2, v7->lr3
and the number of local procs on each node would be four. If vpid=0 then does a comm_spawn of one process on host1, the values of the parent job would remain unchanged. The local_rank of the child process would be 0 and its num_local_procs would be 1 since it is in a separate jobid.
I have verified this functionality for the rsh case - need to verify that slurm and other cases also get the right values. Some consolidation of common code is probably going to occur in the SDS components to make this simpler and more maintainable in the future.
This commit was SVN r14706.
2007-05-21 14:30:10 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_compute_usage(orte_job_map_t *map, orte_std_cntr_t num_procs);
|
|
|
|
|
Bring in the generalized xcast communication system along with the correspondingly revised orted launch. I will send a message out to developers explaining the basic changes. In brief:
1. generalize orte_rml.xcast to become a general broadcast-like messaging system. Messages can now be sent to any tag on the daemons or processes. Note that any message sent via xcast will be delivered to ALL processes in the specified job - you don't get to pick and choose. At a later date, we will introduce an augmented capability that will use the daemons as relays, but will allow you to send to a specified array of process names.
2. extended orte_rml.xcast so it supports more scalable message routing methodologies. At the moment, we support three: (a) direct, which sends the message directly to all recipients; (b) linear, which sends the message to the local daemon on each node, which then relays it to its own local procs; and (b) binomial, which sends the message via a binomial algo across all the daemons, each of which then relays to its own local procs. The crossover points between the algos are adjustable via MCA param, or you can simply demand that a specific algo be used.
3. orteds no longer exhibit two types of behavior: bootproxy or VM. Orteds now always behave like they are part of a virtual machine - they simply launch a job if mpirun tells them to do so. This is another step towards creating an "orteboot" functionality, but also provided a clean system for supporting message relaying.
Note one major impact of this commit: multiple daemons on a node cannot be supported any longer! Only a single daemon/node is now allowed.
This commit is known to break support for the following environments: POE, Xgrid, Xcpu, Windows. It has been tested on rsh, SLURM, and Bproc. Modifications for TM support have been made but could not be verified due to machine problems at LANL. Modifications for SGE have been made but could not be verified. The developers for the non-verified environments will be separately notified along with suggestions on how to fix the problems.
This commit was SVN r15007.
2007-06-12 13:28:54 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_define_daemons(orte_job_map_t *map);
|
Compute and pass the local_rank and local number of procs (in that proc's job) on the node.
To be precise, given this hypothetical launching pattern:
host1: vpids 0, 2, 4, 6
host2: vpids 1, 3, 5, 7
The local_rank for these procs would be:
host1: vpids 0->local_rank 0, v2->lr1, v4->lr2, v6->lr3
host2: vpids 1->local_rank 0, v3->lr1, v5->lr2, v7->lr3
and the number of local procs on each node would be four. If vpid=0 then does a comm_spawn of one process on host1, the values of the parent job would remain unchanged. The local_rank of the child process would be 0 and its num_local_procs would be 1 since it is in a separate jobid.
I have verified this functionality for the rsh case - need to verify that slurm and other cases also get the right values. Some consolidation of common code is probably going to occur in the SDS components to make this simpler and more maintainable in the future.
This commit was SVN r14706.
2007-05-21 14:30:10 +00:00
|
|
|
|
2006-10-03 17:40:00 +00:00
|
|
|
/** Local data type functions */
|
|
|
|
void orte_rmaps_base_std_obj_release(orte_data_value_t *value);
|
|
|
|
|
|
|
|
/* JOB_MAP */
|
2006-10-07 15:45:24 +00:00
|
|
|
int orte_rmaps_base_copy_map(orte_job_map_t **dest, orte_job_map_t *src, orte_data_type_t type);
|
|
|
|
int orte_rmaps_base_compare_map(orte_job_map_t *value1, orte_job_map_t *value2, orte_data_type_t type);
|
2006-10-03 17:40:00 +00:00
|
|
|
int orte_rmaps_base_pack_map(orte_buffer_t *buffer, void *src,
|
|
|
|
orte_std_cntr_t num_vals, orte_data_type_t type);
|
2006-10-07 15:45:24 +00:00
|
|
|
int orte_rmaps_base_print_map(char **output, char *prefix, orte_job_map_t *src, orte_data_type_t type);
|
|
|
|
int orte_rmaps_base_size_map(size_t *size, orte_job_map_t *src, orte_data_type_t type);
|
2006-10-03 17:40:00 +00:00
|
|
|
int orte_rmaps_base_unpack_map(orte_buffer_t *buffer, void *dest,
|
|
|
|
orte_std_cntr_t *num_vals, orte_data_type_t type);
|
|
|
|
|
|
|
|
/* MAPPED_PROC */
|
2006-10-07 15:45:24 +00:00
|
|
|
int orte_rmaps_base_copy_mapped_proc(orte_mapped_proc_t **dest, orte_mapped_proc_t *src, orte_data_type_t type);
|
|
|
|
int orte_rmaps_base_compare_mapped_proc(orte_mapped_proc_t *value1, orte_mapped_proc_t *value2, orte_data_type_t type);
|
2006-10-03 17:40:00 +00:00
|
|
|
int orte_rmaps_base_pack_mapped_proc(orte_buffer_t *buffer, void *src,
|
|
|
|
orte_std_cntr_t num_vals, orte_data_type_t type);
|
2006-10-07 15:45:24 +00:00
|
|
|
int orte_rmaps_base_print_mapped_proc(char **output, char *prefix, orte_mapped_proc_t *src, orte_data_type_t type);
|
|
|
|
int orte_rmaps_base_size_mapped_proc(size_t *size, orte_mapped_proc_t *src, orte_data_type_t type);
|
2006-10-03 17:40:00 +00:00
|
|
|
int orte_rmaps_base_unpack_mapped_proc(orte_buffer_t *buffer, void *dest,
|
|
|
|
orte_std_cntr_t *num_vals, orte_data_type_t type);
|
|
|
|
|
|
|
|
/* MAPPED_NODE */
|
2006-10-07 15:45:24 +00:00
|
|
|
int orte_rmaps_base_copy_mapped_node(orte_mapped_node_t **dest, orte_mapped_node_t *src, orte_data_type_t type);
|
|
|
|
int orte_rmaps_base_compare_mapped_node(orte_mapped_node_t *value1, orte_mapped_node_t *value2, orte_data_type_t type);
|
2006-10-03 17:40:00 +00:00
|
|
|
int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
|
|
|
|
orte_std_cntr_t num_vals, orte_data_type_t type);
|
2006-10-07 15:45:24 +00:00
|
|
|
int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_node_t *src, orte_data_type_t type);
|
|
|
|
int orte_rmaps_base_size_mapped_node(size_t *size, orte_mapped_node_t *src, orte_data_type_t type);
|
2006-10-03 17:40:00 +00:00
|
|
|
int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
|
|
|
|
orte_std_cntr_t *num_vals, orte_data_type_t type);
|
|
|
|
|
2006-09-14 21:29:51 +00:00
|
|
|
/*
|
|
|
|
* external API functions will be documented in the mca/rmaps/rmaps.h file
|
|
|
|
*/
|
|
|
|
|
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|