1
1
openmpi/orte/mca/plm/alps/plm_alps.h
Howard Pritchard 39367ca0bf plm/alps: only use srun for Native SLURM
Turns out that the way the SLURM plm works
is not compatible with the way MPI processes
on Cray XC obtain RDMA credentials to use
the high speed network.  Unlike with ALPS,
the mpirun process is on the first compute
node in the job.  With the current PLM launch
system, mpirun (HNP daemon) launches the MPI
ranks on that node rather than relying on
srun.

This will probably require a significant amount
of effort to rework to support Native SLURM
on Cray XC's.  As a short term alternative,
have the alps plm (which gets selected by default
again on Cray systems regardless of the launch system)
check whether or not srun or alps is being used on the
system.  If alps is not being used, print a helpful
message for the user and abort the job launch.

Signed-off-by: Howard Pritchard <howardp@lanl.gov>
2015-12-22 11:03:42 -08:00

54 строки
1.4 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef ORTE_PLM_ALPS_EXPORT_H
#define ORTE_PLM_ALPS_EXPORT_H
#include "orte_config.h"
#include "orte/mca/mca.h"
#include "orte/mca/plm/plm.h"
#if CRAY_WLM_DETECT
#include "wlm_detect.h"
#endif
BEGIN_C_DECLS
struct orte_plm_alps_component_t {
orte_plm_base_component_t super;
int priority;
bool debug;
char *aprun_cmd;
char *custom_args;
};
typedef struct orte_plm_alps_component_t orte_plm_alps_component_t;
/*
* Globally exported variable
*/
ORTE_MODULE_DECLSPEC extern orte_plm_alps_component_t
mca_plm_alps_component;
ORTE_DECLSPEC extern orte_plm_base_module_t
orte_plm_alps_module;
extern bool mca_plm_alps_using_aprun;
END_C_DECLS
#endif /* ORTE_PLM_ALPS_EXPORT_H */