2008-02-28 01:57:57 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2011-06-23 20:38:02 +00:00
|
|
|
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
2008-02-28 01:57:57 +00:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "orte_config.h"
|
|
|
|
#include "orte/constants.h"
|
|
|
|
|
2009-11-09 14:26:24 +00:00
|
|
|
#if defined(HAVE_CNOS_MPI_OS_H)
|
|
|
|
# include "cnos_mpi_os.h"
|
|
|
|
#elif defined(HAVE_CATAMOUNT_CNOS_MPI_OS_H)
|
|
|
|
# include "catamount/cnos_mpi_os.h"
|
|
|
|
#endif
|
2008-02-28 01:57:57 +00:00
|
|
|
|
Per the RFC, extend the current use of the ompi_proc_t flags field (without changing the field itself).
The prior ompi_proc_t structure had a uint8_t flag field in it, where only one
bit was used to flag that a proc was "local". In that context, "local" was
constrained to mean "local to this node".
This commit provides a greater degree of granularity on the term "local", to include tests
to see if the proc is on the same socket, PC board, node, switch, CU (computing
unit), and cluster.
Add #define's to designate which bits stand for which local condition. This
was added to the OPAL layer to avoid conflicting with the proposed movement of
the BTLs. To make it easier to use, a set of macros have been defined - e.g.,
OPAL_PROC_ON_LOCAL_SOCKET - that test the specific bit. These can be used in
the code base to clearly indicate which sense of locality is being considered.
All locations in the code base that looked at the current proc_t field have
been changed to use the new macros.
Also modify the orte_ess modules so that each returns a uint8_t (to match the
ompi_proc_t field) that contains a complete description of the locality of this
proc. Obviously, not all environments will be capable of providing such detailed
info. Thus, getting a "false" from a test for "on_local_socket" may simply
indicate a lack of knowledge.
This commit was SVN r20496.
2009-02-10 02:20:16 +00:00
|
|
|
#include "opal/mca/paffinity/paffinity.h"
|
2009-02-14 02:26:12 +00:00
|
|
|
#include "opal/util/output.h"
|
2010-03-23 20:47:41 +00:00
|
|
|
#include "opal/class/opal_list.h"
|
2008-02-28 01:57:57 +00:00
|
|
|
|
|
|
|
#include "orte/mca/errmgr/base/base.h"
|
|
|
|
#include "orte/util/proc_info.h"
|
|
|
|
#include "orte/runtime/orte_globals.h"
|
|
|
|
#include "orte/mca/grpcomm/base/base.h"
|
2008-08-31 18:06:55 +00:00
|
|
|
#include "orte/runtime/runtime_internals.h"
|
2008-02-28 01:57:57 +00:00
|
|
|
|
|
|
|
#include "orte/mca/ess/ess.h"
|
|
|
|
#include "orte/mca/ess/base/base.h"
|
|
|
|
#include "orte/mca/ess/cnos/ess_cnos.h"
|
|
|
|
|
2009-05-04 11:07:40 +00:00
|
|
|
static int rte_init(void);
|
2008-02-28 01:57:57 +00:00
|
|
|
static int rte_finalize(void);
|
|
|
|
static void rte_abort(int status, bool report) __opal_attribute_noreturn__;
|
Per the RFC, extend the current use of the ompi_proc_t flags field (without changing the field itself).
The prior ompi_proc_t structure had a uint8_t flag field in it, where only one
bit was used to flag that a proc was "local". In that context, "local" was
constrained to mean "local to this node".
This commit provides a greater degree of granularity on the term "local", to include tests
to see if the proc is on the same socket, PC board, node, switch, CU (computing
unit), and cluster.
Add #define's to designate which bits stand for which local condition. This
was added to the OPAL layer to avoid conflicting with the proposed movement of
the BTLs. To make it easier to use, a set of macros have been defined - e.g.,
OPAL_PROC_ON_LOCAL_SOCKET - that test the specific bit. These can be used in
the code base to clearly indicate which sense of locality is being considered.
All locations in the code base that looked at the current proc_t field have
been changed to use the new macros.
Also modify the orte_ess modules so that each returns a uint8_t (to match the
ompi_proc_t field) that contains a complete description of the locality of this
proc. Obviously, not all environments will be capable of providing such detailed
info. Thus, getting a "false" from a test for "on_local_socket" may simply
indicate a lack of knowledge.
This commit was SVN r20496.
2009-02-10 02:20:16 +00:00
|
|
|
static uint8_t proc_get_locality(orte_process_name_t *proc);
|
2008-04-30 19:49:53 +00:00
|
|
|
static char* proc_get_hostname(orte_process_name_t *proc);
|
2008-09-25 13:39:08 +00:00
|
|
|
static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc);
|
|
|
|
static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc);
|
2008-02-28 01:57:57 +00:00
|
|
|
|
|
|
|
orte_ess_base_module_t orte_ess_cnos_module = {
|
|
|
|
rte_init,
|
|
|
|
rte_finalize,
|
2008-03-05 04:57:23 +00:00
|
|
|
rte_abort,
|
Per the RFC, extend the current use of the ompi_proc_t flags field (without changing the field itself).
The prior ompi_proc_t structure had a uint8_t flag field in it, where only one
bit was used to flag that a proc was "local". In that context, "local" was
constrained to mean "local to this node".
This commit provides a greater degree of granularity on the term "local", to include tests
to see if the proc is on the same socket, PC board, node, switch, CU (computing
unit), and cluster.
Add #define's to designate which bits stand for which local condition. This
was added to the OPAL layer to avoid conflicting with the proposed movement of
the BTLs. To make it easier to use, a set of macros have been defined - e.g.,
OPAL_PROC_ON_LOCAL_SOCKET - that test the specific bit. These can be used in
the code base to clearly indicate which sense of locality is being considered.
All locations in the code base that looked at the current proc_t field have
been changed to use the new macros.
Also modify the orte_ess modules so that each returns a uint8_t (to match the
ompi_proc_t field) that contains a complete description of the locality of this
proc. Obviously, not all environments will be capable of providing such detailed
info. Thus, getting a "false" from a test for "on_local_socket" may simply
indicate a lack of knowledge.
This commit was SVN r20496.
2009-02-10 02:20:16 +00:00
|
|
|
proc_get_locality,
|
2008-10-31 21:10:00 +00:00
|
|
|
NULL, /* proc_get_daemon is only used in ORTE */
|
2008-04-30 19:49:53 +00:00
|
|
|
proc_get_hostname,
|
|
|
|
proc_get_local_rank,
|
|
|
|
proc_get_node_rank,
|
2011-06-23 20:38:02 +00:00
|
|
|
orte_ess_base_proc_get_epoch, /* get_epoch */
|
2008-10-31 21:10:00 +00:00
|
|
|
NULL, /* add_pidmap is only used in ORTE */
|
|
|
|
NULL, /* update_nidmap is only used in ORTE */
|
2008-03-05 04:57:23 +00:00
|
|
|
NULL /* ft_event */
|
2008-02-28 01:57:57 +00:00
|
|
|
};
|
|
|
|
|
2008-04-30 19:49:53 +00:00
|
|
|
static cnos_nidpid_map_t *map;
|
|
|
|
|
2009-05-04 11:07:40 +00:00
|
|
|
static int rte_init(void)
|
2008-02-28 01:57:57 +00:00
|
|
|
{
|
|
|
|
int rc;
|
2008-03-20 16:55:57 +00:00
|
|
|
int nprocs;
|
2008-08-31 18:06:55 +00:00
|
|
|
|
|
|
|
orte_dt_init();
|
2008-02-28 01:57:57 +00:00
|
|
|
|
|
|
|
/* Get our process information */
|
|
|
|
|
|
|
|
/* Procs in this environment are directly launched. Hence, there
|
|
|
|
* was no mpirun to create a jobid for us, and each app proc is
|
|
|
|
* going to have to fend for itself. For now, we assume that the
|
|
|
|
* jobid is some arbitrary number (say, 1).
|
|
|
|
*/
|
|
|
|
ORTE_PROC_MY_NAME->jobid = 1;
|
|
|
|
|
|
|
|
/* find our vpid from cnos */
|
|
|
|
ORTE_PROC_MY_NAME->vpid = (orte_vpid_t) cnos_get_rank();
|
|
|
|
|
|
|
|
/* Get the number of procs in the job from cnos */
|
2009-03-05 21:56:03 +00:00
|
|
|
orte_process_info.num_procs = (orte_std_cntr_t) cnos_get_size();
|
2011-06-23 20:38:02 +00:00
|
|
|
|
|
|
|
if (orte_process_info.max_procs < orte_process_info.num_procs) {
|
|
|
|
orte_process_info.max_procs = orte_process_info.num_procs;
|
|
|
|
}
|
2008-02-28 01:57:57 +00:00
|
|
|
|
2008-04-30 19:49:53 +00:00
|
|
|
/* Get the nid map */
|
2008-03-20 16:55:57 +00:00
|
|
|
nprocs = cnos_get_nidpid_map(&map);
|
|
|
|
if (nprocs <= 0) {
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output(0, "%5d: cnos_get_nidpid_map() returned %d",
|
2008-03-20 16:55:57 +00:00
|
|
|
cnos_get_rank(), nprocs);
|
|
|
|
return ORTE_ERR_FATAL;
|
|
|
|
}
|
|
|
|
|
2008-02-28 01:57:57 +00:00
|
|
|
/* MPI_Init needs the grpcomm framework, so we have to init it */
|
|
|
|
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_open())) {
|
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_select())) {
|
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* that's all we need here */
|
|
|
|
return ORTE_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int rte_finalize(void)
|
|
|
|
{
|
|
|
|
/* just cleanup the things we used */
|
|
|
|
orte_grpcomm_base_close();
|
|
|
|
|
|
|
|
/* clean out the global structures */
|
|
|
|
orte_proc_info_finalize();
|
|
|
|
|
|
|
|
return ORTE_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void rte_abort(int status, bool report)
|
|
|
|
{
|
|
|
|
exit(status);
|
|
|
|
}
|
2008-04-30 19:49:53 +00:00
|
|
|
|
Per the RFC, extend the current use of the ompi_proc_t flags field (without changing the field itself).
The prior ompi_proc_t structure had a uint8_t flag field in it, where only one
bit was used to flag that a proc was "local". In that context, "local" was
constrained to mean "local to this node".
This commit provides a greater degree of granularity on the term "local", to include tests
to see if the proc is on the same socket, PC board, node, switch, CU (computing
unit), and cluster.
Add #define's to designate which bits stand for which local condition. This
was added to the OPAL layer to avoid conflicting with the proposed movement of
the BTLs. To make it easier to use, a set of macros have been defined - e.g.,
OPAL_PROC_ON_LOCAL_SOCKET - that test the specific bit. These can be used in
the code base to clearly indicate which sense of locality is being considered.
All locations in the code base that looked at the current proc_t field have
been changed to use the new macros.
Also modify the orte_ess modules so that each returns a uint8_t (to match the
ompi_proc_t field) that contains a complete description of the locality of this
proc. Obviously, not all environments will be capable of providing such detailed
info. Thus, getting a "false" from a test for "on_local_socket" may simply
indicate a lack of knowledge.
This commit was SVN r20496.
2009-02-10 02:20:16 +00:00
|
|
|
static uint8_t proc_get_locality(orte_process_name_t *proc)
|
2008-04-30 19:49:53 +00:00
|
|
|
{
|
|
|
|
if (map[ORTE_PROC_MY_NAME->vpid].nid ==
|
|
|
|
map[proc->vpid].nid) {
|
Per the RFC, extend the current use of the ompi_proc_t flags field (without changing the field itself).
The prior ompi_proc_t structure had a uint8_t flag field in it, where only one
bit was used to flag that a proc was "local". In that context, "local" was
constrained to mean "local to this node".
This commit provides a greater degree of granularity on the term "local", to include tests
to see if the proc is on the same socket, PC board, node, switch, CU (computing
unit), and cluster.
Add #define's to designate which bits stand for which local condition. This
was added to the OPAL layer to avoid conflicting with the proposed movement of
the BTLs. To make it easier to use, a set of macros have been defined - e.g.,
OPAL_PROC_ON_LOCAL_SOCKET - that test the specific bit. These can be used in
the code base to clearly indicate which sense of locality is being considered.
All locations in the code base that looked at the current proc_t field have
been changed to use the new macros.
Also modify the orte_ess modules so that each returns a uint8_t (to match the
ompi_proc_t field) that contains a complete description of the locality of this
proc. Obviously, not all environments will be capable of providing such detailed
info. Thus, getting a "false" from a test for "on_local_socket" may simply
indicate a lack of knowledge.
This commit was SVN r20496.
2009-02-10 02:20:16 +00:00
|
|
|
return (OPAL_PROC_ON_NODE | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER);
|
2008-04-30 19:49:53 +00:00
|
|
|
}
|
|
|
|
|
Per the RFC, extend the current use of the ompi_proc_t flags field (without changing the field itself).
The prior ompi_proc_t structure had a uint8_t flag field in it, where only one
bit was used to flag that a proc was "local". In that context, "local" was
constrained to mean "local to this node".
This commit provides a greater degree of granularity on the term "local", to include tests
to see if the proc is on the same socket, PC board, node, switch, CU (computing
unit), and cluster.
Add #define's to designate which bits stand for which local condition. This
was added to the OPAL layer to avoid conflicting with the proposed movement of
the BTLs. To make it easier to use, a set of macros have been defined - e.g.,
OPAL_PROC_ON_LOCAL_SOCKET - that test the specific bit. These can be used in
the code base to clearly indicate which sense of locality is being considered.
All locations in the code base that looked at the current proc_t field have
been changed to use the new macros.
Also modify the orte_ess modules so that each returns a uint8_t (to match the
ompi_proc_t field) that contains a complete description of the locality of this
proc. Obviously, not all environments will be capable of providing such detailed
info. Thus, getting a "false" from a test for "on_local_socket" may simply
indicate a lack of knowledge.
This commit was SVN r20496.
2009-02-10 02:20:16 +00:00
|
|
|
return OPAL_PROC_NON_LOCAL;
|
2008-04-30 19:49:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static char* proc_get_hostname(orte_process_name_t *proc)
|
|
|
|
{
|
2008-06-09 22:28:26 +00:00
|
|
|
static char hostname[128];
|
|
|
|
snprintf(hostname, 128, "n%d", map[proc->vpid].nid);
|
|
|
|
return hostname;
|
2008-04-30 19:49:53 +00:00
|
|
|
}
|
|
|
|
|
2008-09-25 13:39:08 +00:00
|
|
|
static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc)
|
2008-04-30 19:49:53 +00:00
|
|
|
{
|
|
|
|
/* RHC: someone more familiar with CNOS needs to
|
|
|
|
* fix this to return the correct value
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-09-25 13:39:08 +00:00
|
|
|
static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc)
|
2008-04-30 19:49:53 +00:00
|
|
|
{
|
|
|
|
/* RHC: someone more familiar with CNOS needs to
|
|
|
|
* fix this to return the correct value
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
2010-03-23 20:47:41 +00:00
|
|
|
|