95019cc310
This commit was SVN r27210.
1904 строки
62 KiB
C
1904 строки
62 KiB
C
/*
|
|
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
|
*
|
|
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "orte_config.h"
|
|
#include "orte/constants.h"
|
|
#include "orte/types.h"
|
|
|
|
#include <errno.h>
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif /* HAVE_UNISTD_H */
|
|
#ifdef HAVE_STRING_H
|
|
#include <string.h>
|
|
#endif /* HAVE_STRING_H */
|
|
|
|
#include "opal/mca/base/mca_base_param.h"
|
|
#include "opal/mca/hwloc/hwloc.h"
|
|
|
|
#include "opal/util/argv.h"
|
|
#include "opal/class/opal_tree.h"
|
|
|
|
#include "orte/util/show_help.h"
|
|
#include "orte/util/error_strings.h"
|
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
#include "orte/mca/rmaps/base/rmaps_private.h"
|
|
#include "orte/mca/rmaps/base/base.h"
|
|
|
|
#include "orte/runtime/orte_globals.h"
|
|
|
|
#include "rmaps_lama.h"
|
|
|
|
#include MCA_timer_IMPLEMENTATION_HEADER
|
|
|
|
|
|
/*********************************
|
|
* Module setup
|
|
*********************************/
|
|
static int orte_rmaps_lama_map(orte_job_t *jdata);
|
|
orte_rmaps_base_module_t orte_rmaps_lama_module = {
|
|
orte_rmaps_lama_map
|
|
};
|
|
|
|
|
|
/*********************************
|
|
* Timer
|
|
*********************************/
|
|
#define RMAPS_LAMA_TIMER_TOTAL 0
|
|
#define RMAPS_LAMA_TIMER_PARSE_PARAMS 1
|
|
#define RMAPS_LAMA_TIMER_BUILD_MAX_TREE 2
|
|
#define RMAPS_LAMA_TIMER_MAPPING 3
|
|
#define RMAPS_LAMA_TIMER_ORDERING 4
|
|
#define RMAPS_LAMA_TIMER_MAX 5
|
|
|
|
static double rmaps_lama_get_time(void);
|
|
static void rmaps_lama_set_time(int idx, bool is_start);
|
|
static void rmaps_lama_display_all_timers(void);
|
|
static void rmaps_lama_clear_timers(void);
|
|
static void rmaps_lama_display_indv_timer_core(double diff, char *str);
|
|
|
|
static double timer_start[RMAPS_LAMA_TIMER_MAX];
|
|
static double timer_end[RMAPS_LAMA_TIMER_MAX];
|
|
static double timer_accum[RMAPS_LAMA_TIMER_MAX];
|
|
|
|
#define RMAPS_LAMA_CLEAR_TIMERS() \
|
|
{ \
|
|
if( rmaps_lama_timing_enabled ) { \
|
|
rmaps_lama_clear_timers(); \
|
|
} \
|
|
}
|
|
#define RMAPS_LAMA_START_TIMER(idx) \
|
|
{ \
|
|
if( rmaps_lama_timing_enabled ) { \
|
|
rmaps_lama_set_time(idx, true); \
|
|
} \
|
|
}
|
|
#define RMAPS_LAMA_END_TIMER(idx) \
|
|
{ \
|
|
if( rmaps_lama_timing_enabled ) { \
|
|
rmaps_lama_set_time(idx, false); \
|
|
} \
|
|
}
|
|
#define RMAPS_LAMA_DISPLAY_TIMERS() \
|
|
{ \
|
|
if( rmaps_lama_timing_enabled ) { \
|
|
rmaps_lama_display_all_timers(); \
|
|
} \
|
|
}
|
|
|
|
|
|
/*********************************
|
|
* Structures & Defines
|
|
*********************************/
|
|
static void rmaps_lama_hwloc_user_construct(rmaps_lama_hwloc_user_t *item);
|
|
static void rmaps_lama_hwloc_user_destruct(rmaps_lama_hwloc_user_t *item);
|
|
|
|
OBJ_CLASS_INSTANCE(rmaps_lama_hwloc_user_t,
|
|
opal_object_t,
|
|
rmaps_lama_hwloc_user_construct,
|
|
rmaps_lama_hwloc_user_destruct);
|
|
|
|
|
|
/*********************************
|
|
* Globals
|
|
*********************************/
|
|
/*
|
|
* Mapping
|
|
*/
|
|
rmaps_lama_level_type_t *lama_mapping_layout = NULL;
|
|
static rmaps_lama_level_type_t *lama_mapping_layout_sort = NULL;
|
|
int lama_mapping_num_layouts = 0;
|
|
|
|
/*
|
|
* Binding
|
|
*/
|
|
rmaps_lama_level_type_t lama_binding_level = LAMA_LEVEL_UNKNOWN;
|
|
static int lama_binding_num_levels = 0;
|
|
|
|
/*
|
|
* MPPR
|
|
*/
|
|
rmaps_lama_level_info_t *lama_mppr_levels = NULL;
|
|
int lama_mppr_num_levels = 0;
|
|
|
|
/*
|
|
* Ordering
|
|
*/
|
|
static rmaps_lama_order_type_t lama_ordering = LAMA_ORDER_NATURAL;
|
|
|
|
/*
|
|
* Homogeneous system optimization
|
|
*/
|
|
bool lama_mppr_max_tree_homogeneous_system = false;
|
|
|
|
|
|
/*********************************
|
|
* Support Macros
|
|
*********************************/
|
|
|
|
|
|
/*********************************
|
|
* Support functions
|
|
*********************************/
|
|
/*
|
|
* Preprocess the command line arguments
|
|
*/
|
|
static int orte_rmaps_lama_process_params(orte_job_t *jdata);
|
|
|
|
/*
|
|
* Mapping Support:
|
|
* Core mapping function
|
|
*/
|
|
static int orte_rmaps_lama_map_core(orte_job_t *jdata);
|
|
|
|
/*
|
|
* Mapping Support:
|
|
* Recursive function for mapping process
|
|
*/
|
|
static int rmaps_lama_map_core_iter_level(orte_job_t *jdata,
|
|
orte_app_context_t *cur_app_context,
|
|
opal_list_t *node_list,
|
|
orte_node_t **cur_mach_ptr,
|
|
opal_tree_t *max_tree,
|
|
int cur_level,
|
|
int mach_level,
|
|
int **pu_idx_ref,
|
|
int **last_pu_idx_ref,
|
|
int *num_mapped,
|
|
int max_procs,
|
|
int *iter_passes);
|
|
|
|
/*
|
|
* Mapping Support:
|
|
* Access the next machine in the node list
|
|
*/
|
|
static orte_node_t* get_next_machine(orte_job_t *jdata, opal_list_t *node_list,
|
|
opal_list_item_t *cur_mach);
|
|
|
|
/*
|
|
* Mapping Support:
|
|
* Check the availability of the requested slot on the specified node
|
|
*/
|
|
static int check_node_availability(orte_node_t *cur_node,
|
|
opal_tree_t *max_tree,
|
|
int *pu_idx_ref,
|
|
char **slot_list);
|
|
|
|
/*
|
|
* Mapping Support:
|
|
* Debugging PU display
|
|
*/
|
|
static void display_pu_ref(int *ref, int size, int rank, orte_proc_t *proc);
|
|
static char * pu_ref_to_str(int *ref, int size);
|
|
|
|
/*
|
|
* Mapping Support:
|
|
* Convert the process layout 'layer' to the sorted position for the PU
|
|
*/
|
|
static int convert_layer_to_sort_idx(rmaps_lama_level_type_t layer);
|
|
|
|
/*
|
|
* MPPR Support:
|
|
* Check to make sure a process can be placed on this resource given the
|
|
* MPPR restrictions.
|
|
*/
|
|
static int rmaps_lama_check_mppr(orte_node_t *node,
|
|
hwloc_obj_t *child_obj);
|
|
static int rmaps_lama_iter_mppr_parents(orte_node_t *node,
|
|
hwloc_obj_t *child_obj,
|
|
bool check_only);
|
|
static int rmaps_lama_iter_mppr_children(orte_node_t *node,
|
|
hwloc_obj_t *child_obj,
|
|
bool check_only);
|
|
|
|
/*
|
|
* MPPR Support:
|
|
* Increment parents of this child to account for a process being placed
|
|
* on this resource.
|
|
*/
|
|
static int rmaps_lama_inc_mppr(orte_node_t *node,
|
|
hwloc_obj_t *child_obj);
|
|
|
|
/*
|
|
* Mapping Support:
|
|
* Return the native representation of the slot list
|
|
*/
|
|
static char * get_native_slot_list(orte_node_t *cur_node,
|
|
hwloc_obj_t *pu_obj,
|
|
int *put_idx_ref);
|
|
|
|
/*
|
|
* Ordering Support:
|
|
* Reorder sequentially
|
|
*/
|
|
static int rmaps_lama_ordering_sequential(orte_job_t *jdata);
|
|
|
|
/*
|
|
* Map a single process to a specific node
|
|
*/
|
|
static int orte_rmaps_lama_map_process(orte_job_t *jdata,
|
|
orte_node_t *node,
|
|
int app_idx,
|
|
orte_proc_t **proc);
|
|
|
|
/*********************************
|
|
* Main Module function to map a job
|
|
*********************************/
|
|
static int orte_rmaps_lama_map(orte_job_t *jdata)
|
|
{
|
|
int ret, exit_status = ORTE_SUCCESS;
|
|
mca_base_component_t *loc_comp = &mca_rmaps_lama_component.base_version;
|
|
|
|
RMAPS_LAMA_CLEAR_TIMERS();
|
|
RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_TOTAL);
|
|
|
|
/*
|
|
* Sanity Check:
|
|
* If we are not the 'chosen' mapper, then exit here
|
|
*/
|
|
if (NULL != jdata->map->req_mapper &&
|
|
0 != strcasecmp(jdata->map->req_mapper, loc_comp->mca_component_name)) {
|
|
/* a mapper has been specified, and it isn't me */
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: job %s not using lama mapper (using %s)",
|
|
ORTE_JOBID_PRINT(jdata->jobid),
|
|
jdata->map->req_mapper);
|
|
return ORTE_ERR_TAKE_NEXT_OPTION;
|
|
}
|
|
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Mapping job %s",
|
|
ORTE_JOBID_PRINT(jdata->jobid));
|
|
|
|
/*
|
|
* Identify this as the mapper responsible for this job
|
|
*/
|
|
if (NULL != jdata->map->last_mapper) {
|
|
free(jdata->map->last_mapper);
|
|
}
|
|
jdata->map->last_mapper = strdup(loc_comp->mca_component_name);
|
|
|
|
/*
|
|
* Start at the beginning...
|
|
*/
|
|
jdata->num_procs = 0;
|
|
|
|
/*
|
|
* Process the command line arguments
|
|
*/
|
|
RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_PARSE_PARAMS);
|
|
if( ORTE_SUCCESS != (ret = orte_rmaps_lama_process_params(jdata)) ) {
|
|
ORTE_ERROR_LOG(ret);
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_PARSE_PARAMS);
|
|
|
|
/*
|
|
* Actually map the job
|
|
*/
|
|
if( ORTE_SUCCESS != (ret = orte_rmaps_lama_map_core(jdata)) ) {
|
|
ORTE_ERROR_LOG(ret);
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
|
|
/*
|
|
* All Done
|
|
*/
|
|
|
|
RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_TOTAL);
|
|
RMAPS_LAMA_DISPLAY_TIMERS();
|
|
|
|
|
|
cleanup:
|
|
if( NULL != lama_mapping_layout ) {
|
|
free(lama_mapping_layout);
|
|
lama_mapping_layout = NULL;
|
|
}
|
|
|
|
if( NULL != lama_mapping_layout_sort ) {
|
|
free(lama_mapping_layout_sort);
|
|
lama_mapping_layout_sort = NULL;
|
|
}
|
|
|
|
if( NULL != lama_mppr_levels ) {
|
|
free(lama_mppr_levels);
|
|
lama_mppr_levels = NULL;
|
|
}
|
|
|
|
return exit_status;
|
|
}
|
|
|
|
|
|
/*********************************
|
|
* User defined lookup structure for hwloc topology
|
|
*********************************/
|
|
static void rmaps_lama_hwloc_user_construct(rmaps_lama_hwloc_user_t *item)
|
|
{
|
|
item->node_mppr = OBJ_NEW(opal_pointer_array_t);
|
|
opal_pointer_array_init(item->node_mppr,
|
|
ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
|
|
ORTE_GLOBAL_ARRAY_MAX_SIZE,
|
|
ORTE_GLOBAL_ARRAY_BLOCK_SIZE);
|
|
}
|
|
|
|
static void rmaps_lama_hwloc_user_destruct(rmaps_lama_hwloc_user_t *item)
|
|
{
|
|
orte_std_cntr_t i;
|
|
|
|
if( NULL != item->node_mppr ) {
|
|
for(i = 0; i < item->node_mppr->size; ++i) {
|
|
if( NULL != item->node_mppr->addr[i] ) {
|
|
OBJ_RELEASE(item->node_mppr->addr[i]);
|
|
item->node_mppr->addr[i] = NULL;
|
|
}
|
|
}
|
|
OBJ_RELEASE(item->node_mppr);
|
|
item->node_mppr = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
/*********************************
|
|
* Command line parameter parsing functions
|
|
*********************************/
|
|
static int orte_rmaps_lama_process_params(orte_job_t *jdata)
|
|
{
|
|
int ret, i;
|
|
char *type_str = NULL;
|
|
|
|
/*
|
|
* Process map/bind/order/mppr aliases
|
|
*/
|
|
if( ORTE_SUCCESS != (ret = rmaps_lama_process_alias_params(jdata) ) ) {
|
|
opal_output(0, "mca:rmaps:lama: ERROR: Failed while processing aliases");
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Parse: Binding
|
|
*/
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ----- Binding : [%s]",
|
|
rmaps_lama_cmd_bind);
|
|
if( ORTE_SUCCESS != (ret = rmaps_lama_parse_binding(rmaps_lama_cmd_bind,
|
|
&lama_binding_level,
|
|
&lama_binding_num_levels)) ) {
|
|
opal_output(0, "mca:rmaps:lama: ERROR: Invalid Binding String: %s",
|
|
rmaps_lama_cmd_bind);
|
|
return ret;
|
|
}
|
|
|
|
if( 10 <= opal_output_get_verbosity(orte_rmaps_base.rmaps_output) ) {
|
|
type_str = lama_type_enum_to_str(lama_binding_level);
|
|
opal_output_verbose(10, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ----- Binding : %*d x %10s",
|
|
MAX_BIND_DIGIT_LEN, lama_binding_num_levels, type_str);
|
|
free(type_str);
|
|
type_str = NULL;
|
|
}
|
|
/* Reset the binding option since we are going to do it ourselves */
|
|
OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE);
|
|
|
|
/*
|
|
* Parse: Mapping from Process Layout string
|
|
*/
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ----- Mapping : [%s]",
|
|
rmaps_lama_cmd_map);
|
|
if( ORTE_SUCCESS != (ret = rmaps_lama_parse_mapping(rmaps_lama_cmd_map,
|
|
&lama_mapping_layout,
|
|
&lama_mapping_layout_sort,
|
|
&lama_mapping_num_layouts)) ) {
|
|
opal_output(0, "mca:rmaps:lama: ERROR: Invalid Mapping Process Layout: %s",
|
|
rmaps_lama_cmd_map);
|
|
return ret;
|
|
}
|
|
|
|
if( 10 <= opal_output_get_verbosity(orte_rmaps_base.rmaps_output) ) {
|
|
for( i = 0; i < lama_mapping_num_layouts; ++i ) {
|
|
type_str = lama_type_enum_to_str(lama_mapping_layout[i]);
|
|
opal_output_verbose(10, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ----- Mapping : (%d) %10s (%d vs %d)",
|
|
i, type_str,
|
|
lama_mapping_layout[i], lama_mapping_layout_sort[i]);
|
|
free(type_str);
|
|
type_str = NULL;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Parse: MPPR
|
|
*/
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ----- MPPR : [%s]",
|
|
rmaps_lama_cmd_mppr);
|
|
if( ORTE_SUCCESS != (ret = rmaps_lama_parse_mppr(rmaps_lama_cmd_mppr,
|
|
&lama_mppr_levels,
|
|
&lama_mppr_num_levels)) ) {
|
|
opal_output(0, "mca:rmaps:lama: ERROR: Invalid MPPR: %s",
|
|
rmaps_lama_cmd_mppr);
|
|
return ret;
|
|
}
|
|
|
|
if( 10 <= opal_output_get_verbosity(orte_rmaps_base.rmaps_output) ) {
|
|
for( i = 0; i < lama_mppr_num_levels; ++i ) {
|
|
type_str = lama_type_enum_to_str(lama_mppr_levels[i].type);
|
|
opal_output_verbose(10, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ----- MPPR : %*d at %10s",
|
|
MAX_BIND_DIGIT_LEN, lama_mppr_levels[i].max_resources, type_str);
|
|
free(type_str);
|
|
type_str = NULL;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Parse: Ordering
|
|
*/
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ----- Ordering : [%s]",
|
|
rmaps_lama_cmd_ordering);
|
|
if( ORTE_SUCCESS != (ret = rmaps_lama_parse_ordering(rmaps_lama_cmd_ordering,
|
|
&lama_ordering)) ) {
|
|
opal_output(0, "mca:rmaps:lama: ERROR: Invalid Ordering Argument: %s",
|
|
rmaps_lama_cmd_ordering);
|
|
return ret;
|
|
}
|
|
|
|
if( 10 <= opal_output_get_verbosity(orte_rmaps_base.rmaps_output) ) {
|
|
if( LAMA_ORDER_NATURAL == lama_ordering ) {
|
|
type_str = strdup("Natural");
|
|
}
|
|
else if( LAMA_ORDER_SEQ == lama_ordering ) {
|
|
type_str = strdup("Sequential");
|
|
}
|
|
else {
|
|
type_str = strdup("Unknown");
|
|
}
|
|
opal_output_verbose(10, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ----- Ordering : %10s",
|
|
type_str);
|
|
free(type_str);
|
|
type_str = NULL;
|
|
}
|
|
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
/*********************************
|
|
* Support functions
|
|
*********************************/
|
|
rmaps_lama_level_type_t lama_type_str_to_enum(char *param)
|
|
{
|
|
if( 0 == strncmp(param, "n", strlen("n")) ) {
|
|
return LAMA_LEVEL_MACHINE;
|
|
}
|
|
else if( 0 == strncmp(param, "b", strlen("b")) ) {
|
|
return LAMA_LEVEL_BOARD;
|
|
}
|
|
else if( 0 == strncmp(param, "s", strlen("s")) ) {
|
|
return LAMA_LEVEL_SOCKET;
|
|
}
|
|
else if( 0 == strncmp(param, "c", strlen("c")) ) {
|
|
return LAMA_LEVEL_CORE;
|
|
}
|
|
else if( 0 == strncmp(param, "h", strlen("h")) ) {
|
|
return LAMA_LEVEL_PU;
|
|
}
|
|
else if( 0 == strncmp(param, "L1", strlen("L1")) ) {
|
|
return LAMA_LEVEL_CACHE_L1;
|
|
}
|
|
else if( 0 == strncmp(param, "L2", strlen("L2")) ) {
|
|
return LAMA_LEVEL_CACHE_L2;
|
|
}
|
|
else if( 0 == strncmp(param, "L3", strlen("L3")) ) {
|
|
return LAMA_LEVEL_CACHE_L3;
|
|
}
|
|
else if( 0 == strncmp(param, "N", strlen("N")) ) {
|
|
return LAMA_LEVEL_NUMA;
|
|
}
|
|
|
|
return LAMA_LEVEL_UNKNOWN;
|
|
}
|
|
|
|
char * lama_type_enum_to_str(rmaps_lama_level_type_t param)
|
|
{
|
|
if( LAMA_LEVEL_MACHINE == param ) {
|
|
return strdup("Machine");
|
|
}
|
|
else if( LAMA_LEVEL_BOARD == param ) {
|
|
return strdup("Board");
|
|
}
|
|
else if( LAMA_LEVEL_SOCKET == param ) {
|
|
return strdup("Socket");
|
|
}
|
|
else if( LAMA_LEVEL_CORE == param ) {
|
|
return strdup("Core");
|
|
}
|
|
else if( LAMA_LEVEL_PU == param ) {
|
|
return strdup("Hw. Thread");
|
|
}
|
|
else if( LAMA_LEVEL_CACHE_L1 == param ) {
|
|
return strdup("L1 Cache");
|
|
}
|
|
else if( LAMA_LEVEL_CACHE_L2 == param ) {
|
|
return strdup("L2 Cache");
|
|
}
|
|
else if( LAMA_LEVEL_CACHE_L3 == param ) {
|
|
return strdup("L3 Cache");
|
|
}
|
|
else if( LAMA_LEVEL_NUMA == param ) {
|
|
return strdup("NUMA");
|
|
}
|
|
|
|
return strdup("Unknown");
|
|
}
|
|
|
|
/*********************************
|
|
* Core Mapper function
|
|
*********************************/
|
|
static int orte_rmaps_lama_map_core(orte_job_t *jdata)
|
|
{
|
|
int ret, exit_status = ORTE_SUCCESS;
|
|
int cur_app_idx = 0;
|
|
int num_slots, num_nodes;
|
|
orte_app_context_t *cur_app_context = NULL;
|
|
orte_node_t *cur_mach = NULL;
|
|
orte_node_t **cur_mach_ptr = NULL;
|
|
orte_proc_t *proc = NULL;
|
|
opal_list_t *node_list = NULL;
|
|
opal_list_item_t *item = NULL;
|
|
opal_tree_t *max_tree = NULL;
|
|
int *pu_idx_ref = NULL;
|
|
int *last_pu_idx_ref = NULL;
|
|
int i, num_mapped, last_num_mapped, mach_level = -1;
|
|
orte_std_cntr_t j;
|
|
int max_procs_to_map;
|
|
int iter_passes;
|
|
char * last_level_str = NULL;
|
|
bool initial_map = true;
|
|
|
|
/*
|
|
* Setup PU reference
|
|
* Find the position of the 'machine'
|
|
*/
|
|
pu_idx_ref = (int*)malloc(sizeof(int) * lama_mapping_num_layouts);
|
|
last_pu_idx_ref = (int*)malloc(sizeof(int) * lama_mapping_num_layouts);
|
|
|
|
for( i = 0; i < lama_mapping_num_layouts; ++i ) {
|
|
pu_idx_ref[i] = 0;
|
|
last_pu_idx_ref[i] = -1;
|
|
if( LAMA_LEVEL_MACHINE == lama_mapping_layout[i] ) {
|
|
mach_level = i;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Foreach app context
|
|
*/
|
|
for(cur_app_idx = 0; cur_app_idx < jdata->apps->size; ++cur_app_idx ) {
|
|
if( NULL == (cur_app_context = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, cur_app_idx))) {
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Get the list of nodes for this app_context.
|
|
*/
|
|
node_list = OBJ_NEW(opal_list_t);
|
|
ret = orte_rmaps_base_get_target_nodes(node_list,
|
|
&num_slots,
|
|
cur_app_context,
|
|
jdata->map->mapping,
|
|
initial_map, false);
|
|
if(ORTE_SUCCESS != ret ) {
|
|
ORTE_ERROR_LOG(ret);
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
num_nodes = (orte_std_cntr_t)opal_list_get_size(node_list);
|
|
/* Flag that all subsequent requests should not reset the node->mapped flag */
|
|
initial_map = false;
|
|
|
|
/*
|
|
* If a bookmark exists from some prior mapping, then start from there
|
|
*/
|
|
cur_mach = (orte_node_t*)orte_rmaps_base_get_starting_point(node_list, jdata);
|
|
|
|
/*
|
|
* If the application did not specify the number of procs
|
|
* then set it to the number of 'slots'
|
|
* JJH: TODO: Revisit 'max_procs' calculation
|
|
*/
|
|
if (0 == cur_app_context->num_procs) {
|
|
cur_app_context->num_procs = num_slots;
|
|
}
|
|
max_procs_to_map = cur_app_context->num_procs;
|
|
|
|
/*
|
|
* Build the Max Tree
|
|
*/
|
|
RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_BUILD_MAX_TREE);
|
|
max_tree = rmaps_lama_create_empty_max_tree();
|
|
if( ORTE_SUCCESS != (ret = rmaps_lama_build_max_tree(jdata, node_list,
|
|
max_tree,
|
|
&lama_mppr_max_tree_homogeneous_system)) ) {
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_BUILD_MAX_TREE);
|
|
|
|
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Mapping: -----------------------");
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_MAPPING);
|
|
|
|
/*
|
|
* Clear PU reference
|
|
*/
|
|
for( i = 0; i < lama_mapping_num_layouts; ++i ) {
|
|
pu_idx_ref[i] = 0;
|
|
}
|
|
|
|
/*
|
|
* Mapping: Recursively loop over all levels
|
|
*/
|
|
num_mapped = 0;
|
|
last_num_mapped = 0;
|
|
iter_passes = 0;
|
|
cur_mach_ptr = (orte_node_t**)malloc(sizeof(orte_node_t*));
|
|
*cur_mach_ptr = cur_mach;
|
|
while( max_procs_to_map > num_mapped ) {
|
|
ret = rmaps_lama_map_core_iter_level(jdata,
|
|
cur_app_context,
|
|
node_list,
|
|
cur_mach_ptr,
|
|
max_tree,
|
|
lama_mapping_num_layouts-1,
|
|
mach_level,
|
|
&pu_idx_ref,
|
|
&last_pu_idx_ref,
|
|
&num_mapped,
|
|
max_procs_to_map,
|
|
&iter_passes);
|
|
if( ORTE_SUCCESS != ret ) {
|
|
ORTE_ERROR_LOG(ret);
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
|
|
/*
|
|
* We only get here (without finishing the mapping) if we are going to
|
|
* start oversubscribing resources.
|
|
*/
|
|
if( max_procs_to_map > num_mapped ) {
|
|
if( !rmaps_lama_can_oversubscribe ) {
|
|
orte_show_help("help-orte-rmaps-lama.txt",
|
|
"orte-rmaps-lama:oversubscribe",
|
|
true,
|
|
num_mapped, max_procs_to_map);
|
|
exit_status = ORTE_ERROR;
|
|
goto cleanup;
|
|
} else {
|
|
rmaps_lama_am_oversubscribing = true;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check to see if we have made any progress in the mapping loop
|
|
*/
|
|
if( 0 < cur_app_idx && 2 == iter_passes ) {
|
|
/*
|
|
* Give it another pass:
|
|
* This is an edge case when we are trying to restart from a
|
|
* bookmark left by a previous app context. If this app context
|
|
* is starting from exactly the beginning of the allocation
|
|
* then the recursive loop could return out here after the
|
|
* increment pass. This is indicated by (iter_passes = 2).
|
|
* Since no processes were mapped, we just try again.
|
|
*/
|
|
}
|
|
else if( last_num_mapped == num_mapped ) {
|
|
orte_show_help("help-orte-rmaps-lama.txt",
|
|
"orte-rmaps-lama:no-resources-available",
|
|
true,
|
|
cur_app_idx,
|
|
num_mapped, max_procs_to_map,
|
|
(NULL == rmaps_lama_cmd_map ? "[Not Provided]" : rmaps_lama_cmd_map),
|
|
(NULL == rmaps_lama_cmd_bind ? "[Not Provided]" : rmaps_lama_cmd_bind),
|
|
(NULL == rmaps_lama_cmd_mppr ? "[Not Provided]" : rmaps_lama_cmd_mppr),
|
|
(NULL == rmaps_lama_cmd_ordering ? "[Not Provided]" : rmaps_lama_cmd_ordering));
|
|
exit_status = ORTE_ERROR;
|
|
goto cleanup;
|
|
} else {
|
|
last_num_mapped = num_mapped;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Display Bookmark for debugging
|
|
*/
|
|
last_level_str = pu_ref_to_str(last_pu_idx_ref, lama_mapping_num_layouts);
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Bookmark: --> Node %10s PU %10s",
|
|
jdata->bookmark->name, last_level_str);
|
|
free(last_level_str);
|
|
last_level_str = NULL;
|
|
|
|
/*
|
|
* Clenup for next iteration
|
|
*/
|
|
if( NULL != node_list ) {
|
|
while(NULL != (item = opal_list_remove_first(node_list))) {
|
|
OBJ_RELEASE(item);
|
|
}
|
|
OBJ_RELEASE(node_list);
|
|
node_list = NULL;
|
|
}
|
|
|
|
OBJ_RELEASE(max_tree);
|
|
max_tree = NULL;
|
|
}
|
|
|
|
RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_MAPPING);
|
|
|
|
|
|
/*
|
|
* Ordering
|
|
*/
|
|
RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_ORDERING);
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
if( LAMA_ORDER_SEQ == lama_ordering ) {
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Ordering: Sequential ------------");
|
|
|
|
if( ORTE_SUCCESS != (ret = rmaps_lama_ordering_sequential(jdata)) ) {
|
|
ORTE_ERROR_LOG(ret);
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
}
|
|
else {
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Ordering: Natural ---------------");
|
|
#if 0
|
|
/*
|
|
* We compute our own vpids inline with the algorithm. So no need to use the
|
|
* orte_rmaps_base_compute_vpids() function.
|
|
*/
|
|
#endif
|
|
}
|
|
RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_ORDERING);
|
|
|
|
|
|
/*
|
|
* Display Mapping
|
|
*/
|
|
if( 10 <= opal_output_get_verbosity(orte_rmaps_base.rmaps_output) ) {
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
for( j = 0; j < jdata->procs->size; ++j) {
|
|
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) {
|
|
continue;
|
|
}
|
|
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Ordering: Proc. %2d on Node %10s - Slot %s",
|
|
proc->name.vpid, proc->node->name, proc->cpu_bitmap);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* All done
|
|
*/
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Finished ------------------------");
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
|
|
|
|
cleanup:
|
|
if( NULL != node_list ) {
|
|
while(NULL != (item = opal_list_remove_first(node_list))) {
|
|
OBJ_RELEASE(item);
|
|
}
|
|
OBJ_RELEASE(node_list);
|
|
node_list = NULL;
|
|
}
|
|
|
|
if( NULL != max_tree ) {
|
|
OBJ_RELEASE(max_tree);
|
|
max_tree = NULL;
|
|
}
|
|
|
|
if( NULL != pu_idx_ref ) {
|
|
free(pu_idx_ref);
|
|
pu_idx_ref = NULL;
|
|
}
|
|
|
|
if( NULL != last_level_str ) {
|
|
free(last_level_str);
|
|
last_level_str = NULL;
|
|
}
|
|
|
|
return exit_status;
|
|
}
|
|
|
|
static int rmaps_lama_map_core_iter_level(orte_job_t *jdata,
|
|
orte_app_context_t *cur_app_context,
|
|
opal_list_t *node_list,
|
|
orte_node_t **cur_mach_ptr,
|
|
opal_tree_t *max_tree,
|
|
int cur_level,
|
|
int mach_level,
|
|
int **pu_idx_ref,
|
|
int **last_pu_idx_ref,
|
|
int *num_mapped,
|
|
int max_procs,
|
|
int *iter_passes)
|
|
{
|
|
int ret, exit_status = ORTE_SUCCESS;
|
|
int i, j;
|
|
opal_tree_item_t *tree_for_level = NULL;
|
|
int max_subtree_arity = 0;
|
|
char * level_str = NULL;
|
|
char * last_level_str = NULL;
|
|
char * slot_list = NULL;
|
|
orte_proc_t *proc = NULL;
|
|
int pu_idx = 0;
|
|
|
|
/*
|
|
* Find the current tree for this level
|
|
* If it is the machine level, then we need to access the information from
|
|
* the node list, not the max_tree.
|
|
*/
|
|
if( cur_level != mach_level ) {
|
|
tree_for_level = opal_tree_find_with(opal_tree_get_root(max_tree),
|
|
&lama_mapping_layout[cur_level]);
|
|
/*
|
|
* We do not need subtree, but the arity of the subtree
|
|
* JJH TODO: This should be an opal_tree function.
|
|
*/
|
|
max_subtree_arity = 1; /* include self */
|
|
while( NULL != (tree_for_level = opal_tree_get_next_sibling(tree_for_level)) ) {
|
|
++max_subtree_arity;
|
|
}
|
|
}
|
|
else if( NULL == *cur_mach_ptr ) {
|
|
*cur_mach_ptr = get_next_machine(jdata, node_list, (opal_list_item_t*)(*cur_mach_ptr));
|
|
}
|
|
|
|
pu_idx = convert_layer_to_sort_idx(lama_mapping_layout[cur_level]);
|
|
level_str = lama_type_enum_to_str(lama_mapping_layout[cur_level]);
|
|
|
|
/*
|
|
* Do we need to advance to a bookmark
|
|
*/
|
|
if( (*last_pu_idx_ref)[0] >= 0 && 0 == *iter_passes ) {
|
|
/*
|
|
* Display last mapped
|
|
*/
|
|
last_level_str = pu_ref_to_str(*last_pu_idx_ref, lama_mapping_num_layouts);
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Bookmark: --> Last Mapped: Node %10s (bkmrk %10s) PU %10s - Level %2d",
|
|
(NULL == *cur_mach_ptr ? "(NULL)" : (*cur_mach_ptr)->name),
|
|
jdata->bookmark->name, last_level_str, (*last_pu_idx_ref)[pu_idx]);
|
|
free(last_level_str);
|
|
last_level_str = NULL;
|
|
|
|
/*
|
|
* Set the level starting point to the last known index
|
|
*/
|
|
i = (*last_pu_idx_ref)[pu_idx];
|
|
} else {
|
|
i = 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Loop over all siblings at this level
|
|
* Initial condition above, Increment at bottom, Break check at bottom
|
|
*/
|
|
while( 1 ) {
|
|
/*
|
|
* Define the PU index
|
|
*/
|
|
(*pu_idx_ref)[pu_idx] = i;
|
|
|
|
if( (*last_pu_idx_ref)[0] >= 0 && 0 == *iter_passes ) {
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Mapping: --> Level %2d: %10s (%2d) - I %2d - Arity %2d - %10s - Increment only",
|
|
cur_level+1,
|
|
level_str, pu_idx, i, max_subtree_arity,
|
|
(NULL == *cur_mach_ptr ? "" : (*cur_mach_ptr)->name));
|
|
} else {
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Mapping: --> Level %2d: %10s (%2d) - I %2d - Arity %2d - %10s",
|
|
cur_level+1,
|
|
level_str, pu_idx, i, max_subtree_arity,
|
|
(NULL == *cur_mach_ptr ? "" : (*cur_mach_ptr)->name));
|
|
}
|
|
|
|
|
|
/*
|
|
* If not the inner most loop, iterate to the next level down
|
|
*/
|
|
if( cur_level > 0 ) {
|
|
ret = rmaps_lama_map_core_iter_level(jdata,
|
|
cur_app_context,
|
|
node_list,
|
|
cur_mach_ptr,
|
|
max_tree,
|
|
cur_level - 1,
|
|
mach_level,
|
|
pu_idx_ref,
|
|
last_pu_idx_ref,
|
|
num_mapped,
|
|
max_procs,
|
|
iter_passes);
|
|
if( ORTE_SUCCESS != ret ) {
|
|
ORTE_ERROR_LOG(ret);
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
}
|
|
/*
|
|
* If we are restarting the iteration from a previous bookmark then
|
|
* the first pass through is a no-op mapping pass that just increments
|
|
* the PU reference.
|
|
* Called by innermost loop
|
|
*/
|
|
else if( (*last_pu_idx_ref)[0] >= 0 && 0 == *iter_passes ) {
|
|
*iter_passes += 1;
|
|
}
|
|
/*
|
|
* Try to map at this location
|
|
*/
|
|
else {
|
|
/*
|
|
* On first pass, make sure we increment this, just so we do not
|
|
* accidentally think this is an increment pass.
|
|
*/
|
|
if( 0 == *iter_passes ) {
|
|
*iter_passes += 1;
|
|
}
|
|
|
|
/*
|
|
* Display the PU ref for debugging
|
|
*/
|
|
display_pu_ref(*pu_idx_ref, lama_mapping_num_layouts, *num_mapped, proc);
|
|
|
|
|
|
/*
|
|
* Check to see if this resource is available on this node.
|
|
*
|
|
* In a heterogeneous or otherwise non-uniformly restricted
|
|
* environment we may iterate to a resource that is not
|
|
* available either because it does not exist, or is not
|
|
* available for allocation (off-lined, sub-node allocation).
|
|
* Additionally, we need to check resource constrains expressed
|
|
* in the MPPR and binding.
|
|
*/
|
|
ret = check_node_availability((*cur_mach_ptr),
|
|
max_tree,
|
|
*pu_idx_ref,
|
|
&slot_list);
|
|
if( ORTE_SUCCESS != ret || NULL == slot_list ) {
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:hwtopo: Mapping: --> Level %2d: %s - INVALID/SKIP",
|
|
cur_level+1,
|
|
level_str);
|
|
/*
|
|
* By not mapping here we just let the iterations continue
|
|
* until a suitable match is found or we have exhausted all
|
|
* possible locations to match and thus cannot map any more.
|
|
*/
|
|
}
|
|
else {
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Mapping: --> Level %2d: %s - Slot List (%s)",
|
|
cur_level+1,
|
|
level_str, slot_list);
|
|
|
|
/*
|
|
* Map this process onto the resource specified
|
|
* level_tree_objs[*] and cur_mach point to the specific resource
|
|
*/
|
|
proc = NULL;
|
|
ret = orte_rmaps_lama_map_process(jdata,
|
|
(*cur_mach_ptr),
|
|
cur_app_context->idx,
|
|
&proc);
|
|
if( ORTE_SUCCESS != ret ) {
|
|
ORTE_ERROR_LOG(ret);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Set the binding for this process
|
|
*/
|
|
proc->cpu_bitmap = strdup(slot_list);
|
|
/** JJH: Need to associate with an HWLOC object... hummm.... */
|
|
proc->locale = NULL;
|
|
/* proc->locale = obj; */
|
|
|
|
/*
|
|
* Insert the proc into the 'native' ordering location.
|
|
*/
|
|
proc->name.vpid = jdata->num_procs;
|
|
if (ORTE_SUCCESS != (ret = opal_pointer_array_set_item(jdata->procs,
|
|
proc->name.vpid, proc))) {
|
|
ORTE_ERROR_LOG(ret);
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
jdata->num_procs += 1;
|
|
|
|
/*
|
|
* Save a bookmark so we can return here later if necessary
|
|
*/
|
|
for( j = 0; j < lama_mapping_num_layouts; ++j ) {
|
|
(*last_pu_idx_ref)[j] = (*pu_idx_ref)[j];
|
|
}
|
|
jdata->bookmark = (orte_node_t*)(*cur_mach_ptr);
|
|
|
|
(*num_mapped)++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Increment loop
|
|
*
|
|
* If we are binding, then we may need to advance the binding layer
|
|
* by more than one.
|
|
*/
|
|
if( cur_level != mach_level ) {
|
|
if( lama_binding_level == lama_mapping_layout[cur_level] ) {
|
|
i += lama_binding_num_levels;
|
|
} else {
|
|
++i;
|
|
}
|
|
} else {
|
|
/*
|
|
* Note: Currently we do not allow for 'binding' to multiple machines
|
|
* But keep the code just in case we want to play with 'stride' later
|
|
*/
|
|
if( lama_binding_level == lama_mapping_layout[cur_level] && lama_binding_num_levels > 1) {
|
|
opal_output(0, "mca:rmaps:lama: ERROR: Cannot bind to multiple machines - SHOULD NEVER HAPPEN: %s",
|
|
rmaps_lama_cmd_bind);
|
|
return ORTE_ERROR;
|
|
#if 0
|
|
for( j = 0; j < lama_binding_num_levels; ++j ) {
|
|
cur_mach = get_next_machine(jdata, node_list, (opal_list_item_t*)cur_mach);
|
|
if( NULL == cur_mach ) {
|
|
break;
|
|
}
|
|
++i;
|
|
}
|
|
#endif
|
|
} else {
|
|
*cur_mach_ptr = get_next_machine(jdata, node_list, (opal_list_item_t*)(*cur_mach_ptr));
|
|
++i;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check if we are done mapping before iterating again
|
|
*/
|
|
if( max_procs <= *num_mapped ) {
|
|
exit_status = ORTE_SUCCESS;
|
|
goto cleanup;
|
|
}
|
|
|
|
/*
|
|
* Check if we are done looping
|
|
*/
|
|
if( cur_level != mach_level ) {
|
|
if( i >= max_subtree_arity ) {
|
|
break;
|
|
}
|
|
} else {
|
|
if( NULL == *cur_mach_ptr ) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Sanity Check: Check if we are done mapping
|
|
*/
|
|
if( max_procs <= *num_mapped ) {
|
|
exit_status = ORTE_SUCCESS;
|
|
goto cleanup;
|
|
}
|
|
|
|
cleanup:
|
|
/*
|
|
* If the outermost layer, the increment the number of iteration passes.
|
|
*/
|
|
if( cur_level == lama_mapping_num_layouts-1 ) {
|
|
*iter_passes += 1;
|
|
}
|
|
|
|
if( NULL != level_str ) {
|
|
free(level_str);
|
|
level_str = NULL;
|
|
}
|
|
|
|
if( NULL != slot_list ) {
|
|
free(slot_list);
|
|
slot_list = NULL;
|
|
}
|
|
|
|
return exit_status;
|
|
}
|
|
|
|
static orte_node_t* get_next_machine(orte_job_t *jdata, opal_list_t *node_list,
|
|
opal_list_item_t *cur_mach)
|
|
{
|
|
orte_node_t *next_mach = NULL;
|
|
|
|
if( NULL == cur_mach ) {
|
|
next_mach = (orte_node_t*)opal_list_get_first(node_list);
|
|
}
|
|
else if( opal_list_get_last(node_list) == cur_mach ) {
|
|
next_mach = NULL;
|
|
}
|
|
else {
|
|
next_mach = (orte_node_t*)opal_list_get_next(cur_mach);
|
|
}
|
|
|
|
return next_mach;
|
|
}
|
|
|
|
static int orte_rmaps_lama_map_process(orte_job_t *jdata,
|
|
orte_node_t *node,
|
|
int app_idx,
|
|
orte_proc_t **proc)
|
|
{
|
|
int ret;
|
|
|
|
/*
|
|
* Add this node to the map, but only once
|
|
*/
|
|
if( !node->mapped ) {
|
|
if (ORTE_SUCCESS > (ret = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
|
|
ORTE_ERROR_LOG(ret);
|
|
return ret;
|
|
}
|
|
node->mapped = true;
|
|
OBJ_RETAIN(node); /* maintain accounting on object */
|
|
++(jdata->map->num_nodes);
|
|
}
|
|
|
|
/*
|
|
* Setup the process object
|
|
*/
|
|
if (NULL == (*proc = orte_rmaps_base_setup_proc(jdata, node, app_idx))) {
|
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
static int rmaps_lama_ordering_sequential(orte_job_t *jdata)
|
|
{
|
|
orte_job_map_t *map;
|
|
orte_proc_t *proc = NULL, *swap = NULL;
|
|
orte_std_cntr_t i, j;
|
|
int cur_rank = 0;
|
|
orte_node_t *cur_node = NULL;
|
|
|
|
map = jdata->map;
|
|
|
|
opal_output_verbose(15, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
|
|
/*
|
|
* Assign the ranks sequentially
|
|
*/
|
|
for( i = 0; i < map->nodes->size; ++i) {
|
|
if (NULL == (cur_node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
|
continue;
|
|
}
|
|
for( j = 0; j < cur_node->procs->size; ++j) {
|
|
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(cur_node->procs, j))) {
|
|
continue;
|
|
}
|
|
/* ignore procs from other jobs */
|
|
if (proc->name.jobid != jdata->jobid) {
|
|
continue;
|
|
}
|
|
|
|
opal_output_verbose(15, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Ordering: Rename Proc. %2d to %2d (Rev. %s)",
|
|
proc->name.vpid, cur_rank, proc->node->name);
|
|
proc->name.vpid = cur_rank;
|
|
++cur_rank;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Fix the job structure ordering - Sort by new vpid
|
|
*
|
|
* If we do not do this then the remote daemons assign the incorrect
|
|
* ranks to the processes since they use the relative ordering in the
|
|
* jdata->procs structure to determine vpids locally.
|
|
*
|
|
* JJH: Look at combining these loops with the loop in the core so we
|
|
* JJH: do not have to iterate over the list two times
|
|
*/
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
cur_rank = 0;
|
|
for( j = 0; j < jdata->procs->size; ++j) {
|
|
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) {
|
|
continue;
|
|
}
|
|
|
|
opal_output_verbose(15, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Ordering: Proc. %2d on Node %s",
|
|
proc->name.vpid, proc->node->name);
|
|
|
|
while((int)proc->name.vpid != cur_rank ) {
|
|
swap = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid);
|
|
|
|
opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc);
|
|
opal_pointer_array_set_item(jdata->procs, cur_rank, swap);
|
|
|
|
opal_output_verbose(15, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Ordering: \t SWAP Proc. %2d (%d) and Proc. %2d (%d)",
|
|
proc->name.vpid, cur_rank, swap->name.vpid, proc->name.vpid);
|
|
proc = swap;
|
|
}
|
|
++cur_rank;
|
|
}
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
static int convert_layer_to_sort_idx(rmaps_lama_level_type_t layer)
|
|
{
|
|
int i;
|
|
|
|
for(i = 0; i < lama_mapping_num_layouts; ++i ) {
|
|
if( lama_mapping_layout_sort[i] == layer ) {
|
|
return i;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void display_pu_ref(int *ref, int size, int rank, orte_proc_t *proc)
|
|
{
|
|
char *str = NULL;
|
|
|
|
str = pu_ref_to_str(ref, size);
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Mapping: PU Ref: %s [Rank %2d] Name: %s",
|
|
str, rank,
|
|
(NULL == proc ? "(null)" : ORTE_NAME_PRINT(&proc->name)));
|
|
|
|
free(str);
|
|
|
|
return;
|
|
}
|
|
|
|
static char * pu_ref_to_str(int *ref, int size)
|
|
{
|
|
int i, idx;
|
|
char *str = NULL;
|
|
|
|
str = (char *)malloc(sizeof(char) * (2 * size));
|
|
for(i = 0, idx = 0; i < size; ++i, idx += 2) {
|
|
sprintf(&(str[idx]), "%2d", ref[i]);
|
|
}
|
|
|
|
return str;
|
|
}
|
|
|
|
static int check_node_availability(orte_node_t *cur_node,
|
|
opal_tree_t *max_tree,
|
|
int *pu_idx_ref,
|
|
char **slot_list)
|
|
{
|
|
int exit_status = ORTE_SUCCESS;
|
|
int i;
|
|
char * level_str = NULL;
|
|
hwloc_obj_t *topo_child = NULL, *topo_parent=NULL;
|
|
|
|
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Checking: Node (%s) -------------",
|
|
cur_node->name);
|
|
opal_output_verbose(11, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
|
|
|
|
/*
|
|
* Determine if the current node has the necessary hardware
|
|
* as described by the PU index.
|
|
* Find the hwloc object reference for the resource pointed to
|
|
* by the PU index.
|
|
* JJH TODO: If homogeneous system then this could be simplified.
|
|
*/
|
|
topo_parent = (hwloc_obj_t*)malloc(sizeof(hwloc_obj_t) * 1);
|
|
*topo_parent = hwloc_get_obj_by_depth(cur_node->topology, 0, 0);
|
|
for( i = 0; i < lama_mapping_num_layouts; ++i ) {
|
|
/*
|
|
* Skip 'machine' level
|
|
*/
|
|
if( LAMA_LEVEL_MACHINE == lama_mapping_layout_sort[i] ) {
|
|
continue;
|
|
}
|
|
/*
|
|
* Skip 'board' level
|
|
* JJH: HWLOC does not support BOARD at the moment
|
|
*/
|
|
if( LAMA_LEVEL_BOARD == lama_mapping_layout_sort[i] ) {
|
|
continue;
|
|
}
|
|
|
|
level_str = lama_type_enum_to_str(lama_mapping_layout_sort[i]);
|
|
opal_output_verbose(11, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Checking: %2d of %s",
|
|
pu_idx_ref[i], level_str);
|
|
|
|
/*
|
|
* Find the nth subtree matching the current key
|
|
*/
|
|
topo_child = rmaps_lama_find_nth_subtree_match(cur_node->topology,
|
|
*topo_parent,
|
|
pu_idx_ref[i],
|
|
lama_mapping_layout_sort[i]);
|
|
|
|
/*
|
|
* If it does not exist, then this node is not capable of matching
|
|
* so it is unavailable.
|
|
*/
|
|
if( NULL == topo_child ) {
|
|
opal_output_verbose(11, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Check failed: Node %s does not have a %10s %2d",
|
|
cur_node->name, level_str, pu_idx_ref[i]);
|
|
exit_status = ORTE_ERROR;
|
|
goto cleanup;
|
|
}
|
|
|
|
/*
|
|
* Keep decending the tree
|
|
*/
|
|
topo_parent = topo_child;
|
|
free(level_str);
|
|
level_str = NULL;
|
|
}
|
|
|
|
/*
|
|
* We have sufficient hardware :)
|
|
*/
|
|
|
|
|
|
/*
|
|
* Return the native slot list to bind to
|
|
* Internally checks the MPPR
|
|
*/
|
|
*slot_list = get_native_slot_list(cur_node, topo_parent, pu_idx_ref);
|
|
if( NULL == *slot_list ) {
|
|
goto cleanup;
|
|
}
|
|
|
|
cleanup:
|
|
if( NULL != level_str ) {
|
|
free(level_str);
|
|
level_str = NULL;
|
|
}
|
|
|
|
if( ORTE_SUCCESS != exit_status ) {
|
|
if( NULL != *slot_list ) {
|
|
free(*slot_list);
|
|
*slot_list = NULL;
|
|
}
|
|
}
|
|
|
|
return exit_status;
|
|
}
|
|
|
|
static int rmaps_lama_check_mppr(orte_node_t *node,
|
|
hwloc_obj_t *child_obj)
|
|
{
|
|
int ret;
|
|
|
|
/*
|
|
* Optimization if no MPPR provided
|
|
*/
|
|
if( NULL == lama_mppr_levels ) {
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: No MPPR to check - Skip...");
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Check ---------------------------");
|
|
/*
|
|
* Check Parents (excluding self)
|
|
*/
|
|
if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_parents(node, &(*child_obj)->parent, true)) ) {
|
|
return ret;
|
|
}
|
|
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Check ---------------------------");
|
|
|
|
/*
|
|
* Check Children (including self)
|
|
*/
|
|
if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_children(node, child_obj, true)) ) {
|
|
return ret;
|
|
}
|
|
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Check ---------------------------");
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
static int rmaps_lama_inc_mppr(orte_node_t *node,
|
|
hwloc_obj_t *child_obj)
|
|
{
|
|
int ret;
|
|
|
|
/*
|
|
* Optimization if no MPPR provided
|
|
*/
|
|
if( NULL == lama_mppr_levels ) {
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: No MPPR to increment - Skip...");
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Inc ---------------------------");
|
|
/*
|
|
* Increment Parents (excluding self)
|
|
*/
|
|
if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_parents(node, &(*child_obj)->parent, false)) ) {
|
|
return ret;
|
|
}
|
|
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Inc ---------------------------");
|
|
|
|
/*
|
|
* Increment Children (including self)
|
|
*/
|
|
if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_children(node, child_obj, false)) ) {
|
|
return ret;
|
|
}
|
|
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Inc ---------------------------");
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
static int rmaps_lama_iter_mppr_parents(orte_node_t *node,
|
|
hwloc_obj_t *child_obj,
|
|
bool check_only)
|
|
{
|
|
rmaps_lama_hwloc_user_t *hwloc_userdata = NULL;
|
|
rmaps_lama_node_mppr_t *mppr_accounting = NULL;
|
|
char str[128];
|
|
|
|
/*
|
|
* Basecase
|
|
*/
|
|
if( NULL == *child_obj ) {
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
/*
|
|
* Check self
|
|
*/
|
|
/*
|
|
* Access MPPR info for this object
|
|
*/
|
|
hwloc_userdata = (rmaps_lama_hwloc_user_t*)((opal_hwloc_topo_data_t*)(*child_obj)->userdata)->userdata;
|
|
mppr_accounting = (rmaps_lama_node_mppr_t*)opal_pointer_array_get_item(hwloc_userdata->node_mppr, node->index);
|
|
|
|
hwloc_obj_snprintf(str, sizeof(str), node->topology, *child_obj, "#", 0);
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: %s: P [%2d] %10s - %20s - Max %3d , Cur %3d (Oversub.: %s / %s)",
|
|
(check_only ? "Checking " : "Increment"),
|
|
node->index, node->name, str,
|
|
mppr_accounting->max,
|
|
(check_only ? mppr_accounting->cur : mppr_accounting->cur + 1),
|
|
(rmaps_lama_am_oversubscribing ? "T" : "F"),
|
|
(rmaps_lama_can_oversubscribe ? "T" : "F") );
|
|
|
|
/*
|
|
* Check limits - Error on first to exceed
|
|
*/
|
|
if( check_only ) {
|
|
if( mppr_accounting->max >= 0 && !rmaps_lama_am_oversubscribing) {
|
|
if( (mppr_accounting->cur)+1 > mppr_accounting->max ) {
|
|
return ORTE_ERROR;
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
* Increment current number allocated below this level
|
|
*/
|
|
else {
|
|
mppr_accounting->cur += 1;
|
|
}
|
|
|
|
/*
|
|
* Go to parent
|
|
*/
|
|
return rmaps_lama_iter_mppr_parents(node, &((*child_obj)->parent), check_only);
|
|
}
|
|
|
|
static int rmaps_lama_iter_mppr_children(orte_node_t *node,
|
|
hwloc_obj_t *child_obj,
|
|
bool check_only)
|
|
{
|
|
int ret;
|
|
rmaps_lama_hwloc_user_t *hwloc_userdata = NULL;
|
|
rmaps_lama_node_mppr_t *mppr_accounting = NULL;
|
|
char str[128];
|
|
int i;
|
|
|
|
/*
|
|
* Check self
|
|
*/
|
|
/*
|
|
* Access MPPR info for this object
|
|
*/
|
|
hwloc_userdata = (rmaps_lama_hwloc_user_t*)((opal_hwloc_topo_data_t*)(*child_obj)->userdata)->userdata;
|
|
mppr_accounting = (rmaps_lama_node_mppr_t*)opal_pointer_array_get_item(hwloc_userdata->node_mppr, node->index);
|
|
|
|
hwloc_obj_snprintf(str, sizeof(str), node->topology, *child_obj, "#", 0);
|
|
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: %s: C [%2d] %10s - %20s - Max %3d , Cur %3d (Oversub.: %s / %s)",
|
|
(check_only ? "Checking " : "Increment"),
|
|
node->index, node->name, str,
|
|
mppr_accounting->max,
|
|
(check_only ? mppr_accounting->cur : mppr_accounting->cur + 1),
|
|
(rmaps_lama_am_oversubscribing ? "T" : "F"),
|
|
(rmaps_lama_can_oversubscribe ? "T" : "F") );
|
|
|
|
/*
|
|
* Check limits - Error on first to exceed
|
|
*/
|
|
if( check_only ) {
|
|
if( mppr_accounting->max >= 0 && !rmaps_lama_am_oversubscribing) {
|
|
if( (mppr_accounting->cur)+1 > mppr_accounting->max ) {
|
|
return ORTE_ERROR;
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
* Increment current number allocated below this level
|
|
*/
|
|
else {
|
|
mppr_accounting->cur += 1;
|
|
}
|
|
|
|
/*
|
|
* Check all children
|
|
*/
|
|
for(i = 0; i < (int)(*child_obj)->arity; ++i ) {
|
|
if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_children(node, &((*child_obj)->children[i]), check_only)) ) {
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
static char * get_native_slot_list(orte_node_t *cur_node, hwloc_obj_t *pu_obj, int *put_idx_ref)
|
|
{
|
|
int i;
|
|
char *slot_list = NULL;
|
|
hwloc_obj_t *binding_parent = NULL;
|
|
hwloc_obj_t *cur_parent = NULL;
|
|
hwloc_cpuset_t binding_cpuset;
|
|
hwloc_cpuset_t scratch_cpuset;
|
|
char *type_str = NULL;
|
|
|
|
/*
|
|
* Sanity check
|
|
*/
|
|
if( NULL == pu_obj ) {
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Determine the cpumask to send to the backend for binding
|
|
*/
|
|
|
|
/*
|
|
* Iterate up the tree until we reach the binding parent
|
|
*/
|
|
binding_parent = rmaps_lama_find_parent(cur_node->topology, pu_obj, lama_binding_level);
|
|
if( NULL == binding_parent ) {
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Iterate across cousins until we find enough resources or hit the node boundary
|
|
*/
|
|
binding_cpuset = hwloc_bitmap_alloc();
|
|
hwloc_bitmap_zero(binding_cpuset);
|
|
|
|
scratch_cpuset = hwloc_bitmap_alloc();
|
|
|
|
cur_parent = binding_parent;
|
|
|
|
for(i = 0; i < lama_binding_num_levels; ++i) {
|
|
/*
|
|
* Check MPPR Availability
|
|
*/
|
|
if( ORTE_SUCCESS != rmaps_lama_check_mppr(cur_node, cur_parent) ) {
|
|
goto cleanup;
|
|
}
|
|
|
|
/*
|
|
* Accumulate the bitmask
|
|
*
|
|
* JJH: TODO: Add resource offline check (?)
|
|
*/
|
|
hwloc_bitmap_zero(scratch_cpuset);
|
|
/* JJH: Maybe use opal_hwloc_base_get_available_cpus(cur_node->topology, (*cur_parent)) ?
|
|
* They do pretty much the same thing, but with more checks...
|
|
*/
|
|
hwloc_bitmap_and(scratch_cpuset, (*cur_parent)->allowed_cpuset, (*cur_parent)->online_cpuset);
|
|
hwloc_bitmap_or(binding_cpuset, scratch_cpuset, binding_cpuset);
|
|
|
|
#if 0
|
|
{
|
|
hwloc_obj_snprintf(str, sizeof(str), cur_node->topology, *cur_parent, "#", 0);
|
|
printf("--> BINDING TO -- %-20s \t -- %2d of %2d -- %2d vs %2d\n",str,
|
|
i, lama_binding_level,
|
|
(*binding_parent)->logical_index, (*cur_parent)->logical_index);
|
|
|
|
hwloc_bitmap_snprintf(str, sizeof(str), (*cur_parent)->allowed_cpuset );
|
|
printf("--> CPU A : %-20s\n", str);
|
|
hwloc_bitmap_snprintf(str, sizeof(str), (*cur_parent)->online_cpuset );
|
|
printf("--> CPU B : %-20s\n", str);
|
|
hwloc_bitmap_snprintf(str, sizeof(str), scratch_cpuset);
|
|
printf("--> CPU C : %-20s\n", str);
|
|
hwloc_bitmap_snprintf(str, sizeof(str), binding_cpuset);
|
|
printf("--> CPU D : %-20s\n", str);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Iterate to the next cousin.
|
|
* If we exceed the boundary of the node, then send up an error.
|
|
*/
|
|
if( (i+1) < lama_binding_num_levels && NULL == (*cur_parent)->next_cousin ) {
|
|
type_str = lama_type_enum_to_str(lama_binding_level);
|
|
opal_output_verbose(10, orte_rmaps_base.rmaps_output,
|
|
"mca:rmaps:lama: Error: Not able to bind to %*d x %10s - Stopped at %*d",
|
|
MAX_BIND_DIGIT_LEN, lama_binding_num_levels,
|
|
type_str,
|
|
MAX_BIND_DIGIT_LEN, i);
|
|
free(type_str);
|
|
type_str = NULL;
|
|
goto cleanup;
|
|
}
|
|
/*
|
|
* Point to the next cousin
|
|
*/
|
|
if( NULL != (*cur_parent)->next_cousin ) {
|
|
cur_parent = &((*cur_parent)->next_cousin);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Account for the process placement in the MPPR
|
|
* Assumes a previous check
|
|
* We cannot do this in the loop, since if the MPPR check fails we would
|
|
* need to roll back previous increments.
|
|
*/
|
|
cur_parent = binding_parent;
|
|
for(i = 0; i < lama_binding_num_levels; ++i) {
|
|
/*
|
|
* Account for the process placement in the MPPR
|
|
* Assumes a previous check.
|
|
*/
|
|
if( ORTE_SUCCESS != rmaps_lama_inc_mppr(cur_node, cur_parent) ) {
|
|
goto cleanup;
|
|
}
|
|
|
|
/*
|
|
* Point to the next cousin
|
|
*/
|
|
if( NULL != (*cur_parent)->next_cousin ) {
|
|
cur_parent = &((*cur_parent)->next_cousin);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Convert the cpuset to a slot_list for the remote daemon
|
|
*/
|
|
hwloc_bitmap_list_asprintf(&slot_list, binding_cpuset);
|
|
|
|
cleanup:
|
|
hwloc_bitmap_free(scratch_cpuset);
|
|
hwloc_bitmap_free(binding_cpuset);
|
|
|
|
return slot_list;
|
|
}
|
|
|
|
|
|
/*********************************
|
|
* Timer Support
|
|
*********************************/
|
|
static double rmaps_lama_get_time(void)
|
|
{
|
|
double wtime;
|
|
|
|
#if OPAL_TIMER_USEC_NATIVE
|
|
wtime = (double)opal_timer_base_get_usec() / 1000000.0;
|
|
#else
|
|
struct timeval tv;
|
|
gettimeofday(&tv, NULL);
|
|
wtime = tv.tv_sec;
|
|
wtime += (double)tv.tv_usec / 1000000.0;
|
|
#endif
|
|
|
|
return wtime;
|
|
}
|
|
|
|
static void rmaps_lama_set_time(int idx, bool is_start)
|
|
{
|
|
if(idx < RMAPS_LAMA_TIMER_MAX ) {
|
|
if( is_start ) {
|
|
timer_start[idx] = rmaps_lama_get_time();
|
|
} else {
|
|
timer_end[idx] = rmaps_lama_get_time();
|
|
timer_accum[idx] += timer_end[idx] - timer_start[idx];
|
|
}
|
|
}
|
|
}
|
|
|
|
static void rmaps_lama_display_all_timers(void)
|
|
{
|
|
double diff = 0.0;
|
|
double total = 0.0;
|
|
char * label = NULL;
|
|
|
|
opal_output(0,
|
|
"mca:rmaps:lama: Timing: ---------------------------\n");
|
|
|
|
/*
|
|
* Timer: Parse Parameters
|
|
*/
|
|
label = strdup("Parse Params");
|
|
diff = timer_accum[RMAPS_LAMA_TIMER_PARSE_PARAMS];
|
|
rmaps_lama_display_indv_timer_core(diff, label);
|
|
free(label);
|
|
total += diff;
|
|
|
|
/*
|
|
* Timer: Build Max Tree
|
|
*/
|
|
label = strdup("Build Max Tree");
|
|
diff = timer_accum[RMAPS_LAMA_TIMER_BUILD_MAX_TREE];
|
|
rmaps_lama_display_indv_timer_core(diff, label);
|
|
free(label);
|
|
total += diff;
|
|
|
|
/*
|
|
* Timer: Mapping
|
|
*/
|
|
label = strdup("Mapping");
|
|
diff = timer_accum[RMAPS_LAMA_TIMER_MAPPING];
|
|
rmaps_lama_display_indv_timer_core(diff, label);
|
|
free(label);
|
|
total += diff;
|
|
|
|
/*
|
|
* Timer: Ordering
|
|
*/
|
|
label = strdup("Ordering");
|
|
diff = timer_accum[RMAPS_LAMA_TIMER_ORDERING];
|
|
rmaps_lama_display_indv_timer_core(diff, label);
|
|
free(label);
|
|
total += diff;
|
|
|
|
/*
|
|
* Timer: Total Overhead
|
|
*/
|
|
label = strdup("Other Overhead");
|
|
diff = timer_accum[RMAPS_LAMA_TIMER_TOTAL];
|
|
rmaps_lama_display_indv_timer_core(diff - total, label);
|
|
free(label);
|
|
|
|
/*
|
|
* Timer: Total
|
|
*/
|
|
label = strdup("Total");
|
|
diff = timer_accum[RMAPS_LAMA_TIMER_TOTAL];
|
|
rmaps_lama_display_indv_timer_core(diff, label);
|
|
free(label);
|
|
|
|
opal_output(0,
|
|
"mca:rmaps:lama: ---------------------------------");
|
|
}
|
|
|
|
static void rmaps_lama_clear_timers(void)
|
|
{
|
|
int i;
|
|
for(i = 0; i < RMAPS_LAMA_TIMER_MAX; ++i) {
|
|
timer_start[i] = 0.0;
|
|
timer_end[i] = 0.0;
|
|
timer_accum[i] = 0.0;
|
|
}
|
|
}
|
|
|
|
|
|
static void rmaps_lama_display_indv_timer_core(double diff, char *str)
|
|
{
|
|
double perc = 0;
|
|
double total = 0;
|
|
|
|
total = timer_end[RMAPS_LAMA_TIMER_TOTAL] - timer_start[RMAPS_LAMA_TIMER_TOTAL];
|
|
perc = (diff/total) * 100;
|
|
|
|
opal_output(0,
|
|
"mca:rmaps:lama: \t%-20s = %10.2f ms\t%6.2f %s\n",
|
|
str, (diff * 1000), perc, "%");
|
|
return;
|
|
}
|