2006-09-14 21:29:51 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
/** @file:
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef ORTE_MCA_RMAPS_PRIVATE_H
|
|
|
|
#define ORTE_MCA_RMAPS_PRIVATE_H
|
|
|
|
|
|
|
|
/*
|
|
|
|
* includes
|
|
|
|
*/
|
|
|
|
#include "orte_config.h"
|
2008-02-28 01:57:57 +00:00
|
|
|
#include "orte/types.h"
|
2006-09-14 21:29:51 +00:00
|
|
|
|
2008-02-28 01:57:57 +00:00
|
|
|
#include "orte/runtime/orte_globals.h"
|
2006-09-14 21:29:51 +00:00
|
|
|
|
|
|
|
#include "orte/mca/rmaps/rmaps.h"
|
|
|
|
|
2008-02-28 01:57:57 +00:00
|
|
|
BEGIN_C_DECLS
|
2006-09-14 21:29:51 +00:00
|
|
|
|
|
|
|
/*
|
2006-10-07 15:45:24 +00:00
|
|
|
* Base API functions
|
2006-09-14 21:29:51 +00:00
|
|
|
*/
|
|
|
|
|
2006-10-07 15:45:24 +00:00
|
|
|
/*
|
|
|
|
* Map a job
|
|
|
|
* All calls to rmaps.map_job are routed through this function. This allows callers to
|
|
|
|
* the RMAPS framework to specify the particular mapper they wish to use.
|
|
|
|
*/
|
2008-02-28 01:57:57 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_map_job(orte_job_t *jdata);
|
|
|
|
ORTE_DECLSPEC orte_job_map_t* orte_rmaps_base_get_job_map(orte_jobid_t job);
|
2006-09-14 21:29:51 +00:00
|
|
|
|
2006-10-07 15:45:24 +00:00
|
|
|
|
2008-02-28 01:57:57 +00:00
|
|
|
/* LOCAL FUNCTIONS for use by RMAPS components */
|
2006-09-14 21:29:51 +00:00
|
|
|
|
2006-10-07 15:45:24 +00:00
|
|
|
/*
|
|
|
|
* Function to add a mapped_proc entry to a map
|
|
|
|
* Scans list of nodes on map to see if the specified one already
|
|
|
|
* exists - if so, just add this entry to that node's list of
|
|
|
|
* procs. If not, then add new node entry and put this proc
|
|
|
|
* on its list.
|
|
|
|
*/
|
2008-02-28 01:57:57 +00:00
|
|
|
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_node_t *node,
|
|
|
|
bool oversubscribed, orte_proc_t *proc);
|
2006-10-07 15:45:24 +00:00
|
|
|
|
2008-02-28 01:57:57 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_get_target_nodes(opal_list_t* node_list,
|
|
|
|
orte_std_cntr_t *total_num_slots,
|
|
|
|
orte_app_context_t *app,
|
2008-04-09 22:10:53 +00:00
|
|
|
uint8_t policy);
|
2007-07-14 15:14:07 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_get_target_procs(opal_list_t *procs);
|
|
|
|
|
2006-10-10 04:54:51 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_update_node_usage(opal_list_t *nodes);
|
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
|
|
|
|
orte_app_context_t *app,
|
|
|
|
opal_list_t *master_node_list,
|
|
|
|
orte_std_cntr_t *total_num_slots);
|
|
|
|
|
2008-02-28 01:57:57 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_claim_slot(orte_job_t *jdata,
|
|
|
|
orte_node_t *current_node,
|
|
|
|
orte_vpid_t vpid,
|
2009-04-29 02:13:14 +00:00
|
|
|
char *slot_list,
|
2006-10-10 04:54:51 +00:00
|
|
|
orte_std_cntr_t app_idx,
|
|
|
|
opal_list_t *nodes,
|
2008-04-17 13:50:59 +00:00
|
|
|
bool oversubscribe,
|
|
|
|
bool remove_from_list);
|
2006-09-14 21:29:51 +00:00
|
|
|
|
2008-02-28 01:57:57 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_compute_usage(orte_job_t *jdata);
|
Compute and pass the local_rank and local number of procs (in that proc's job) on the node.
To be precise, given this hypothetical launching pattern:
host1: vpids 0, 2, 4, 6
host2: vpids 1, 3, 5, 7
The local_rank for these procs would be:
host1: vpids 0->local_rank 0, v2->lr1, v4->lr2, v6->lr3
host2: vpids 1->local_rank 0, v3->lr1, v5->lr2, v7->lr3
and the number of local procs on each node would be four. If vpid=0 then does a comm_spawn of one process on host1, the values of the parent job would remain unchanged. The local_rank of the child process would be 0 and its num_local_procs would be 1 since it is in a separate jobid.
I have verified this functionality for the rsh case - need to verify that slurm and other cases also get the right values. Some consolidation of common code is probably going to occur in the SDS components to make this simpler and more maintainable in the future.
This commit was SVN r14706.
2007-05-21 14:30:10 +00:00
|
|
|
|
2009-06-06 01:08:47 +00:00
|
|
|
ORTE_DECLSPEC void orte_rmaps_base_update_usage(orte_job_t *jdata, orte_node_t *oldnode,
|
|
|
|
orte_node_t *newnode, orte_proc_t *newproc);
|
|
|
|
|
2007-07-14 15:14:07 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_rearrange_map(orte_app_context_t *app, orte_job_map_t *map, opal_list_t *procs);
|
|
|
|
|
Bring in the generalized xcast communication system along with the correspondingly revised orted launch. I will send a message out to developers explaining the basic changes. In brief:
1. generalize orte_rml.xcast to become a general broadcast-like messaging system. Messages can now be sent to any tag on the daemons or processes. Note that any message sent via xcast will be delivered to ALL processes in the specified job - you don't get to pick and choose. At a later date, we will introduce an augmented capability that will use the daemons as relays, but will allow you to send to a specified array of process names.
2. extended orte_rml.xcast so it supports more scalable message routing methodologies. At the moment, we support three: (a) direct, which sends the message directly to all recipients; (b) linear, which sends the message to the local daemon on each node, which then relays it to its own local procs; and (b) binomial, which sends the message via a binomial algo across all the daemons, each of which then relays to its own local procs. The crossover points between the algos are adjustable via MCA param, or you can simply demand that a specific algo be used.
3. orteds no longer exhibit two types of behavior: bootproxy or VM. Orteds now always behave like they are part of a virtual machine - they simply launch a job if mpirun tells them to do so. This is another step towards creating an "orteboot" functionality, but also provided a clean system for supporting message relaying.
Note one major impact of this commit: multiple daemons on a node cannot be supported any longer! Only a single daemon/node is now allowed.
This commit is known to break support for the following environments: POE, Xgrid, Xcpu, Windows. It has been tested on rsh, SLURM, and Bproc. Modifications for TM support have been made but could not be verified due to machine problems at LANL. Modifications for SGE have been made but could not be verified. The developers for the non-verified environments will be separately notified along with suggestions on how to fix the problems.
This commit was SVN r15007.
2007-06-12 13:28:54 +00:00
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_define_daemons(orte_job_map_t *map);
|
Compute and pass the local_rank and local number of procs (in that proc's job) on the node.
To be precise, given this hypothetical launching pattern:
host1: vpids 0, 2, 4, 6
host2: vpids 1, 3, 5, 7
The local_rank for these procs would be:
host1: vpids 0->local_rank 0, v2->lr1, v4->lr2, v6->lr3
host2: vpids 1->local_rank 0, v3->lr1, v5->lr2, v7->lr3
and the number of local procs on each node would be four. If vpid=0 then does a comm_spawn of one process on host1, the values of the parent job would remain unchanged. The local_rank of the child process would be 0 and its num_local_procs would be 1 since it is in a separate jobid.
I have verified this functionality for the rsh case - need to verify that slurm and other cases also get the right values. Some consolidation of common code is probably going to occur in the SDS components to make this simpler and more maintainable in the future.
This commit was SVN r14706.
2007-05-21 14:30:10 +00:00
|
|
|
|
2009-06-12 17:52:17 +00:00
|
|
|
|
|
|
|
ORTE_DECLSPEC opal_list_item_t* orte_rmaps_base_get_starting_point(opal_list_t *node_list, orte_job_t *jdata);
|
|
|
|
|
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_map_byslot(orte_job_t *jdata, orte_app_context_t *app,
|
|
|
|
opal_list_t *node_list, orte_vpid_t num_procs,
|
|
|
|
orte_vpid_t vpid_start, opal_list_item_t *cur_node_item,
|
|
|
|
orte_vpid_t ppn);
|
|
|
|
|
|
|
|
ORTE_DECLSPEC int orte_rmaps_base_map_bynode(orte_job_t *jdata, orte_app_context_t *app,
|
|
|
|
opal_list_t *node_list, orte_vpid_t num_procs,
|
|
|
|
orte_vpid_t vpid_start, opal_list_item_t *cur_node_item);
|
|
|
|
|
|
|
|
|
2008-02-28 01:57:57 +00:00
|
|
|
END_C_DECLS
|
2006-10-03 17:40:00 +00:00
|
|
|
|
2006-09-14 21:29:51 +00:00
|
|
|
#endif
|