1
1
Ralph Castain b44f8d4b28 Complete implementation of the ess.proc_get_locality API. Up to this point, the API was only capable of telling if the specified proc was sharing a node with you. However, the returned value was capable of telling you much more detailed info - e.g., if the proc shares a socket, a cache, or numa node. We just didn't have the data to provide that detail.
Use hwloc to obtain the cpuset for each process during mpi_init, and share that info in the modex. As it arrives, use a new opal_hwloc_base utility function to parse the value against the local proc's cpuset and determine where they overlap. Cache the value in the pmap object as it may be referenced multiple times.

Thus, the return value from orte_ess.proc_get_locality is a 16-bit bitmask that describes the resources being shared with you. This bitmask can be tested using the macros in opal/mca/paffinity/paffinity.h

Locality is available for all procs, whether launched via mpirun or directly with an external launcher such as slurm or aprun.

This commit was SVN r25331.
2011-10-19 20:18:14 +00:00

193 строки
6.4 KiB
C

/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2010 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
* The OpenRTE Environment-Specific Services
*
*/
#ifndef ORTE_ESS_H
#define ORTE_ESS_H
#include "orte_config.h"
#include "orte/types.h"
#include "opal/mca/mca.h"
#include "opal/mca/paffinity/paffinity.h"
#include "orte/util/proc_info.h"
BEGIN_C_DECLS
/*
* API functions
*/
/*
* Initialize the RTE for this environment
*/
typedef int (*orte_ess_base_module_init_fn_t)(void);
/*
* Finalize the RTE for this environment
*/
typedef int (*orte_ess_base_module_finalize_fn_t)(void);
/**
* Abort the current application
*
* Aborts currently running application, NOTE: We do NOT call the
* regular C-library "abort" function, even
* though that would have alerted us to the fact that this is
* an abnormal termination, because it would automatically cause
* a core file to be generated. The "report" flag indicates if the
* function should create an appropriate file to alert the local
* orted that termination was abnormal.
*/
typedef void (*orte_ess_base_module_abort_fn_t)(int status, bool report)
__opal_attribute_noreturn_funcptr__;
/**
* Get the locality flag of the specified process
*
* MPI procs need to know whether a proc shares a common socket,
* board, node, computing unit, or cluster. This function provides
* a means for an MPI proc to query the locality of another proc.
*/
typedef opal_paffinity_locality_t (*orte_ess_base_module_get_proc_locality_fn_t)(orte_process_name_t *proc);
/**
* Get the vpid of the daemon who hosts the specified proc
*
* In order to route messages to the correct place, the RML
* and routed modules need to know the vpid of the daemon
* that hosts the intended recipient. This API accesses
* the pidmap/nidmap to retrieve that info
*/
typedef orte_vpid_t (*orte_ess_base_module_proc_get_daemon_fn_t)(orte_process_name_t *proc);
/**
* Get the hostname where a proc resides
*
* MPI procs need to know the hostname where a specified proc resides.
* Different environments provide that info in different ways - e.g., they may
* provide a callable utility to return the answer, or download
* a map of information into each process. This API provides a
* means for each environment to do the "right thing".
*
* NOTE: To avoid memory waste, this function returns a pointer
* to a static storage. IT MUST NOT BE FREED!
*/
typedef char* (*orte_ess_base_module_proc_get_hostname_fn_t)(orte_process_name_t *proc);
/**
* Get the local rank of a remote process
*/
typedef orte_local_rank_t (*orte_ess_base_module_proc_get_local_rank_fn_t)(orte_process_name_t *proc);
/**
* Get the node rank of a remote process
*/
typedef orte_node_rank_t (*orte_ess_base_module_proc_get_node_rank_fn_t)(orte_process_name_t *proc);
/**
* Update the epoch
*
* The epochs of the processes are stored in the process_name struct, but this
* will get the most up to date version stored within the orte_proc_t struct.
* Obviously the epoch of the proc that is passed in will be ignored.
*/
#if ORTE_ENABLE_EPOCH
typedef orte_epoch_t (*orte_ess_base_module_proc_get_epoch_fn_t)(orte_process_name_t *proc);
#else
typedef int (*orte_ess_base_module_proc_get_epoch_fn_t)(orte_process_name_t *proc);
#endif
/**
* Update the pidmap
*
* When a job is dynamically launched via comm_spawn, the pre-existing daemons need to
* update their knowledge of the process map within the job so they can properly do
* things like route messages. This API allows daemons - and anyone else who wants to - to
* add a pidmap for a new job
*/
typedef int (*orte_ess_base_module_update_pidmap_fn_t)(opal_byte_object_t *bo);
/**
* Update a nidmap
*
* When a job is dynamically launched via comm_spawn, the pre-existing daemons need to
* update their knowledge of the node map that contains info on what daemon resides
* on which nodes
*/
typedef int (*orte_ess_base_module_update_nidmap_fn_t)(opal_byte_object_t *bo);
/**
* Handle fault tolerance updates
*
* @param[in] state Fault tolerance state update
*
* @retval ORTE_SUCCESS The operation completed successfully
* @retval ORTE_ERROR An unspecifed error occurred
*/
typedef int (*orte_ess_base_module_ft_event_fn_t)(int state);
/*
* the standard module data structure
*/
struct orte_ess_base_module_1_0_0_t {
orte_ess_base_module_init_fn_t init;
orte_ess_base_module_finalize_fn_t finalize;
orte_ess_base_module_abort_fn_t abort;
orte_ess_base_module_get_proc_locality_fn_t proc_get_locality;
orte_ess_base_module_proc_get_daemon_fn_t proc_get_daemon;
orte_ess_base_module_proc_get_hostname_fn_t proc_get_hostname;
orte_ess_base_module_proc_get_local_rank_fn_t get_local_rank;
orte_ess_base_module_proc_get_node_rank_fn_t get_node_rank;
orte_ess_base_module_proc_get_epoch_fn_t proc_get_epoch;
orte_ess_base_module_update_pidmap_fn_t update_pidmap;
orte_ess_base_module_update_nidmap_fn_t update_nidmap;
orte_ess_base_module_ft_event_fn_t ft_event;
};
typedef struct orte_ess_base_module_1_0_0_t orte_ess_base_module_1_0_0_t;
typedef struct orte_ess_base_module_1_0_0_t orte_ess_base_module_t;
/*
* the standard component data structure
*/
struct orte_ess_base_component_2_0_0_t {
mca_base_component_t base_version;
mca_base_component_data_t base_data;
};
typedef struct orte_ess_base_component_2_0_0_t orte_ess_base_component_2_0_0_t;
typedef struct orte_ess_base_component_2_0_0_t orte_ess_base_component_t;
/*
* Macro for use in components that are of type ess
*/
#define ORTE_ESS_BASE_VERSION_2_0_0 \
MCA_BASE_VERSION_2_0_0, \
"ess", 2, 0, 0
/* Global structure for accessing ESS functions */
ORTE_DECLSPEC extern orte_ess_base_module_t orte_ess; /* holds selected module's function pointers */
END_C_DECLS
#endif