1
1

Add 'generic' support for PMI2 (previously, we checked for PMI2 only on Cray systems.) If your resource manager (e.g. SLURM) has support for PMI2, then the --with-pmi configure flag will enable its usage. If you don't have PMI2, then you will fallback to regular old PMI1. This patch was submitted by Ralph Castain and reviewed and pushed by Josh Ladd. This should be added to cmr:v1.7:reviewer=jladd

This commit was SVN r28666.
Этот коммит содержится в:
Joshua Ladd 2013-06-21 15:28:14 +00:00
родитель 299d5b3dd7
Коммит 0b5c1f2ea8
9 изменённых файлов: 73 добавлений и 111 удалений

Просмотреть файл

@ -27,13 +27,11 @@ AC_DEFUN([OPAL_CHECK_PMI],[
[AC_HELP_STRING([--with-pmi],
[Build PMI support (default: no)])],
[], with_pmi=no)
AC_ARG_WITH([cray-pmi2-ext],
[AC_HELP_STRING([--with-cray-pmi-ext],
[Include Cray PMI2 extensions (default: no)])],
[], with_cray_pmi2_ext=no)
opal_enable_pmi=0
opal_use_cray_pmi2_ext=0
opal_use_pmi2=0
opal_pmi_rpath=
opal_have_slurm_pmi2=0
# save flags
opal_check_pmi_$1_save_CPPFLAGS="$CPPFLAGS"
@ -48,42 +46,63 @@ AC_DEFUN([OPAL_CHECK_PMI],[
AC_MSG_CHECKING([if user requested PMI support])
AS_IF([test "$with_pmi" = "no"],
[AC_MSG_RESULT([no])
opal_use_cray_pmi2_ext=0
$3],
[AC_MSG_RESULT([yes])
AC_MSG_CHECKING([if PMI support installed])
AC_MSG_CHECKING([if PMI or PMI2 support installed])
# cannot use OMPI_CHECK_PACKAGE as its backend header
# support appends "include" to the path, which won't
# work with slurm :-(
AS_IF([test ! -z "$with_pmi" -a "$with_pmi" != "yes"],
[AS_IF([test -d "$with_pmi/lib64"],
[opal_check_pmi_$1_LDFLAGS="-L$with_pmi/lib64"
opal_check_pmi_$1_LIBS="-lpmi -Wl,-rpath=$with_pmi/lib64"],
opal_pmi_rpath="$with_pmi/lib64"],
[opal_check_pmi_$1_LDFLAGS="-L$with_pmi/lib"
opal_check_pmi_$1_LIBS="-lpmi -Wl,-rpath=$with_pmi/lib"])
AS_IF([test -f "$with_pmi/include/pmi.h"],
[opal_check_pmi_$1_CPPFLAGS="-I$with_pmi/include"],
[AS_IF([test -f "$with_pmi/include/slurm/pmi.h"],
[opal_check_pmi_$1_CPPFLAGS="-I$with_pmi/include/slurm"],
opal_pmi_rpath="$with_pmi/lib"])
# default to using PMI-2 if it is present
AS_IF([test -f "$with_pmi/include/pmi2.h" -o -f "$with_pmi/include/pmi.h"],
[opal_check_pmi_$1_CPPFLAGS="-I$with_pmi/include"
AS_IF([test -f "$with_pmi/include/pmi2.h"],
[opal_use_pmi2=1
AC_MSG_RESULT([PMI2 support found])],
[opal_use_pmi2=0
AC_MSG_RESULT([PMI support found])])],
[AS_IF([test -f "$with_pmi/include/slurm/pmi2.h" -o -f "$with_pmi/include/slurm/pmi.h"],
[opal_check_pmi_$1_CPPFLAGS="-I$with_pmi/include/slurm"
AS_IF([test -f "$with_pmi/include/slurm/pmi2.h"],
[opal_use_pmi2=1
opal_have_slurm_pmi2=1
AC_MSG_RESULT([Slurm PMI2 support found])],
[opal_use_pmi2=0
AC_MSG_RESULT([Slurm PMI support found])])],
[AC_MSG_RESULT([not found])
AC_MSG_WARN([PMI support requested (via --with-pmi) but pmi.h])
AC_MSG_WARN([not found under locations:])
AC_MSG_WARN([PMI support requested (via --with-pmi) but neither pmi.h])
AC_MSG_WARN([nor pmi2.h were found under locations:])
AC_MSG_WARN([ $with_pmi/include])
AC_MSG_WARN([ $with_pmi/include/slurm])
AC_MSG_WARN([Specified path: $with_pmi])
AC_MSG_ERROR([Aborting])
$3])])],
[AS_IF([test -f "/usr/include/slurm/pmi.h"],
[opal_check_pmi_$1_CPPFLAGS="-I/usr/include/slurm"])])
$3])])])
AS_IF([test $opal_use_pmi2 = 1],
[AS_IF([test $opal_have_slurm_pmi2 = 1],
[ # slurm puts pmi2 into a separate lib
opal_check_pmi_$1_LIBS="-lpmi2 -lpmi -Wl,-rpath=$opal_pmi_rpath"],
[opal_check_pmi_$1_LIBS="-lpmi -Wl,-rpath=$opal_pmi_rpath"])],
[opal_check_pmi_$1_LIBS="-lpmi -Wl,-rpath=$opal_pmi_rpath"])
LDFLAGS="$LDFLAGS $opal_check_pmi_$1_LDFLAGS"
CPPFLAGS="$CPPFLAGS $opal_check_pmi_$1_CPPFLAGS"
LIBS="$LIBS $opal_check_pmi_$1_LIBS"
opal_have_pmi_support=no
AC_CHECK_HEADERS([pmi.h],
[AC_CHECK_LIB([pmi], [PMI_Init],
[opal_have_pmi_support=yes])])
AS_IF([test "$opal_use_pmi2" = "1"],
[AC_CHECK_HEADERS([pmi2.h],
[AC_CHECK_LIB([pmi2], [PMI2_Init],
[opal_have_pmi_support=yes])])],
[AC_CHECK_HEADERS([pmi.h],
[AC_CHECK_LIB([pmi], [PMI_Init],
[opal_have_pmi_support=yes])])])
AC_MSG_CHECKING([PMI2 or PMI support enabled])
AS_IF([test "$opal_have_pmi_support" = "yes"],
[AC_MSG_RESULT([yes])
opal_enable_pmi=1
@ -95,24 +114,7 @@ AC_DEFUN([OPAL_CHECK_PMI],[
AC_MSG_WARN([PMI support requested (via --with-pmi) but not found.])
AC_MSG_ERROR([Aborting.])
$3])
AC_MSG_CHECKING([if user requested Cray PMI2 extensions])
AS_IF([test "$with_cray_pmi2_ext" = "no"],
[AC_MSG_RESULT([no])
opal_use_pmi2_ext=0],
[AC_MSG_RESULT([yes])
# check to see if pmi2.h header is present. if it is, then we
# will use some of the functions in it.
AC_MSG_CHECKING([if PMI2 extensions installed])
AS_IF([test -f "$with_pmi/include/pmi2.h"],
[opal_use_pmi2_ext=1
AC_MSG_RESULT(yes)],
[AC_MSG_RESULT([no])
AC_MSG_WARN([PMI2 extensions requested (via --with-cray-pmi2-ext) but not found.])
AC_MSG_ERROR([Aborting.])
opal_use_pmi2_ext=0
opal_enable_pmi=0
$3])])])
])
# restore flags - have to add CPPFLAGS so base functions can find pmi.h
CPPFLAGS="$opal_check_pmi_$1_save_CPPFLAGS $opal_check_pmi_$1_CPPFLAGS"
@ -122,8 +124,8 @@ AC_DEFUN([OPAL_CHECK_PMI],[
AC_DEFINE_UNQUOTED([WANT_PMI_SUPPORT],
[$opal_enable_pmi],
[Whether we want PMI support])
AC_DEFINE_UNQUOTED([WANT_CRAY_PMI2_EXT],
[$opal_use_pmi2_ext],
[Whether we want to use Cray PMI2 extensions])
AC_DEFINE_UNQUOTED([WANT_PMI2_SUPPORT],
[$opal_use_pmi2],
[Whether we want to use PMI2])
AM_CONDITIONAL(WANT_PMI_SUPPORT, [test "$opal_enable_pmi" = 1])
])

Просмотреть файл

@ -9,7 +9,6 @@ with_xpmem=/opt/cray/xpmem/0.1-2.0400.30792.5.6.gem
# enable Cray PMI support
with_pmi=/opt/cray/pmi/2.1.4-1.0000.8596.8.9.gem
with_cray_pmi2_ext=yes
# enable ugni btl
with_ugni=/opt/cray/ugni/2.3-1.0400.4127.5.20.gem

Просмотреть файл

@ -13,7 +13,7 @@
#include "ompi/constants.h"
#include <pmi.h>
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
#include <pmi2.h>
#endif
@ -37,7 +37,7 @@ static int publish ( char *service_name, ompi_info_t *info, char *port_name )
{
int rc;
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
if (PMI_SUCCESS != (rc = PMI2_Nameserv_publish(service_name, NULL, port_name))) {
OMPI_ERROR_LOG(rc);
return OMPI_ERROR;
@ -56,7 +56,7 @@ static char* lookup ( char *service_name, ompi_info_t *info )
char *port=NULL;
int rc;
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
port = (char*)malloc(1024*sizeof(char)); /* arbitrary size */
if (PMI_SUCCESS != (rc = PMI2_Nameserv_lookup(service_name, NULL, port, 1024))) {
OMPI_ERROR_LOG(rc);
@ -78,7 +78,7 @@ static int unpublish ( char *service_name, ompi_info_t *info )
{
int rc;
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
if (PMI_SUCCESS != (rc = PMI2_Nameserv_unpublish(service_name, NULL))) {
OMPI_ERROR_LOG(rc);
return OMPI_ERROR;

Просмотреть файл

@ -12,10 +12,7 @@
#include "ompi_config.h"
#include <pmi.h>
#if WANT_CRAY_PMI2_EXT
#include <pmi2.h>
#endif
#include "opal/mca/common/pmi/common_pmi.h"
#include "ompi/constants.h"
#include "ompi/mca/rte/rte.h"
@ -74,39 +71,13 @@ static int pubsub_pmi_component_close(void)
static int pubsub_pmi_component_query(mca_base_module_t **module, int *priority)
{
/* for now, only use PMI when direct launched */
if (NULL == ompi_process_info.my_hnp_uri) {
goto cleanup;
if (NULL != ompi_process_info.my_hnp_uri &&
mca_common_pmi_init ()) {
*priority = my_priority;
*module = (mca_base_module_t *)&ompi_pubsub_pmi_module;
return OMPI_SUCCESS;
}
#if WANT_CRAY_PMI2_EXT
{
int spawned, size, rank, appnum;
if (PMI2_Initialized ()) return OMPI_SUCCESS;
if (PMI_SUCCESS != PMI2_Init(&spawned, &size, &rank, &appnum)) {
goto cleanup;
}
}
#else
{
PMI_BOOL initialized;
if (PMI_SUCCESS != PMI_Initialized(&initialized)) {
goto cleanup;
}
if (PMI_TRUE != initialized && PMI_SUCCESS != PMI_Init(&initialized)) {
goto cleanup;
}
}
#endif
/* if PMI is available, use it */
*priority = my_priority;
*module = (mca_base_module_t *)&ompi_pubsub_pmi_module;
return OMPI_SUCCESS;
cleanup:
/* we can't run */
*priority = -1;
*module = NULL;

Просмотреть файл

@ -18,7 +18,7 @@
#include <string.h>
#include <pmi.h>
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
#include <pmi2.h>
#endif
@ -31,7 +31,7 @@ bool mca_common_pmi_init (void) {
return true;
}
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
{
int spawned, size, rank, appnum;
@ -70,7 +70,7 @@ void mca_common_pmi_finalize (void) {
}
if (0 == --mca_common_pmi_init_count) {
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
PMI2_Finalize ();
#else
PMI_Finalize ();

Просмотреть файл

@ -15,7 +15,7 @@
#include <time.h>
#include <string.h>
#include <pmi.h>
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
#include <pmi2.h>
#endif
@ -80,7 +80,7 @@ static int pmi_keylen_max = -1;
*/
static int kvs_put(const char *key, const char *value)
{
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
return PMI2_KVS_Put(key, value);
#else
return PMI_KVS_Put(pmi_kvs_name, key, value);
@ -89,7 +89,7 @@ static int kvs_put(const char *key, const char *value)
static int kvs_get(const char *key, char *value, int valuelen)
{
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
int len;
return PMI2_KVS_Get(pmi_kvs_name, PMI2_ID_NULL, key, value, valuelen, &len);
@ -98,7 +98,7 @@ static int kvs_get(const char *key, char *value, int valuelen)
#endif
}
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
static char escape_char = '$';
static char *illegal = "/;=";
static char *sub = "012";
@ -156,7 +156,7 @@ static int store(const opal_identifier_t *uid,
switch (type) {
case OPAL_STRING:
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
{
/* the blasted Cray PMI implementation marked a number of common
* ASCII characters as "illegal", so if we are on one of those
@ -391,7 +391,7 @@ static char* fetch_string(const char *key)
/* cleanup */
free(tmp_val);
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
{
/* the blasted Cray PMI implementation marked a number of common
* ASCII characters as "illegal", so if we are on one of those
@ -555,7 +555,7 @@ static int setup_pmi(void)
{
int max_length, rc;
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
pmi_vallen_max = PMI2_MAX_VALLEN;
#else
rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max);
@ -565,7 +565,7 @@ static int setup_pmi(void)
}
#endif
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
/* TODO -- is this ok */
max_length = 1024;
#else
@ -579,7 +579,7 @@ static int setup_pmi(void)
return OPAL_ERR_OUT_OF_RESOURCE;
}
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
rc = PMI2_Job_GetId(pmi_kvs_name, max_length);
#else
rc = PMI_KVS_Get_my_name(pmi_kvs_name,max_length);
@ -589,7 +589,7 @@ static int setup_pmi(void)
return OPAL_ERROR;
}
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
pmi_keylen_max = PMI2_MAX_KEYLEN;
#else
if (PMI_SUCCESS != (rc = PMI_KVS_Get_key_length_max(&pmi_keylen_max))) {

Просмотреть файл

@ -18,11 +18,6 @@
#include "orte_config.h"
#include "orte/constants.h"
#include <pmi.h>
#if WANT_CRAY_PMI2_EXT
#include <pmi2.h>
#endif
#include "opal/mca/common/pmi/common_pmi.h"
#include "orte/util/proc_info.h"

Просмотреть файл

@ -13,11 +13,6 @@
#include "orte_config.h"
#include "orte/constants.h"
#include <pmi.h>
#if WANT_CRAY_PMI2_EXT
#include <pmi2.h>
#endif
#include "opal/mca/mca.h"
#include "opal/mca/common/pmi/common_pmi.h"

Просмотреть файл

@ -18,7 +18,7 @@
#include <string.h>
#include <pmi.h>
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
#include <pmi2.h>
#endif
@ -65,7 +65,7 @@ static int init(void)
{
int max_length, rc;
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
/* TODO -- is this ok */
max_length = 1024;
#else
@ -79,7 +79,7 @@ static int init(void)
return ORTE_ERR_OUT_OF_RESOURCE;
}
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
rc = PMI2_Job_GetId(pmi_kvs_name, max_length);
#else
rc = PMI_KVS_Get_my_name(pmi_kvs_name,max_length);
@ -136,8 +136,8 @@ static int pmi_barrier(orte_grpcomm_collective_t *coll)
return ORTE_SUCCESS;
}
#if WANT_CRAY_PMI2_EXT
/* Cray doesn't provide a barrier, so use the Fence function here */
#if WANT_PMI2_SUPPORT
/* PMI2 doesn't provide a barrier, so use the Fence function here */
if (PMI_SUCCESS != (rc = PMI2_KVS_Fence())) {
OPAL_PMI_ERROR(rc, "PMI2_KVS_Fence");
return ORTE_ERROR;
@ -187,7 +187,7 @@ static int modex(orte_grpcomm_collective_t *coll)
/* our RTE data was constructed and pushed in the ESS pmi component */
/* commit our modex info */
#if WANT_CRAY_PMI2_EXT
#if WANT_PMI2_SUPPORT
PMI2_KVS_Fence();
#else
{