1
1

Cray's PMI implementation is quite different from slurm's - they extended PMI-1 by adding some, but not all, of the PMI-2 APIs. So you can't just switch to using PMI-2 functions as it isn't a complete implementation. Instead, you have to selectively figure out which ones they have in PMI-2, and use any missing ones from PMI-1. What fun.

Modify the configure logic and the PMI components to accommodate Cray's approach. Refactor the PMI error reporting code so it resides in only one place. Cray actually decided -not- to define the PMI-2 error codes, so we have to use the PMI-1 codes instead. More fun.

This commit was SVN r25348.
Этот коммит содержится в:
Ralph Castain 2011-10-21 04:54:38 +00:00
родитель e2adc8fa3a
Коммит 3e72fccacf
9 изменённых файлов: 425 добавлений и 257 удалений

Просмотреть файл

@ -11,24 +11,19 @@
#include "ompi/constants.h"
#include <pmi.h>
#if WANT_CRAY_PMI2_EXT
#include <pmi2.h>
#endif
#include "ompi/info/info.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "ompi/mca/pubsub/base/base.h"
#include "pubsub_pmi.h"
static char* pmi_error(int pmi_err);
#define ORTE_PMI_ERROR(pmi_err, pmi_func) \
do { \
opal_output(0, "%s[%s:%d:%s] %s: %s\n", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, __func__, \
pmi_func, pmi_error(pmi_err)); \
} while(0);
/*
* Init the module
*/
@ -44,11 +39,17 @@ static int publish ( char *service_name, ompi_info_t *info, char *port_name )
{
int rc;
#if WANT_CRAY_PMI2_EXT
if (PMI2_SUCCESS != (rc = PMI2_Nameserv_publish(service_name, NULL, port_name))) {
ORTE_PMI_ERROR(rc, "PMI2_Nameserv_publish");
return OMPI_ERROR;
}
#else
if (PMI_SUCCESS != (rc = PMI_Publish_name(service_name, port_name))) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Publish_name");
return OMPI_ERROR;
}
#endif
return OMPI_SUCCESS;
}
@ -57,11 +58,19 @@ static char* lookup ( char *service_name, ompi_info_t *info )
char *port=NULL;
int rc;
#if WANT_CRAY_PMI2_EXT
port = (char*)malloc(1024*sizeof(char)); /* arbitrary size */
if (PMI2_SUCCESS != (rc = PMI2_Nameserv_lookup(service_name, NULL, port, 1024))) {
ORTE_PMI_ERROR(rc, "PMI2_Nameserv_lookup");
free(port);
return OMPI_ERROR;
}
#else
if (PMI_SUCCESS != (rc = PMI_Lookup_name(service_name, port))) {
ORTE_PMI_ERROR(rc, "PMI_Lookup_name");
return NULL;
}
#endif
return port;
}
@ -71,10 +80,17 @@ static int unpublish ( char *service_name, ompi_info_t *info )
{
int rc;
#if WANT_CRAY_PMI2_EXT
if (PMI_SUCCESS != (rc = PMI2_Nameserv_unpublish(service_name, NULL))) {
ORTE_PMI_ERROR(rc, "PMI2_Nameserv_unpublish");
return OMPI_ERROR;
}
#else
if (PMI_SUCCESS != (rc = PMI_Unpublish_name(service_name))) {
ORTE_PMI_ERROR(rc, "PMI_Unpublish_name");
return OMPI_ERROR;
}
#endif
return OMPI_SUCCESS;;
}
@ -97,34 +113,3 @@ ompi_pubsub_base_module_t ompi_pubsub_pmi_module = {
lookup,
finalize
};
/* useful util */
static char* pmi_error(int pmi_err)
{
char * err_msg;
switch(pmi_err) {
case PMI_FAIL: err_msg = "Operation failed"; break;
case PMI_ERR_INIT: err_msg = "PMI is not initialized"; break;
case PMI_ERR_NOMEM: err_msg = "Input buffer not large enough"; break;
case PMI_ERR_INVALID_ARG: err_msg = "Invalid argument"; break;
case PMI_ERR_INVALID_KEY: err_msg = "Invalid key argument"; break;
case PMI_ERR_INVALID_KEY_LENGTH: err_msg = "Invalid key length argument"; break;
case PMI_ERR_INVALID_VAL: err_msg = "Invalid value argument"; break;
case PMI_ERR_INVALID_VAL_LENGTH: err_msg = "Invalid value length argument"; break;
case PMI_ERR_INVALID_LENGTH: err_msg = "Invalid length argument"; break;
case PMI_ERR_INVALID_NUM_ARGS: err_msg = "Invalid number of arguments"; break;
case PMI_ERR_INVALID_ARGS: err_msg = "Invalid args argument"; break;
case PMI_ERR_INVALID_NUM_PARSED: err_msg = "Invalid num_parsed length argument"; break;
case PMI_ERR_INVALID_KEYVALP: err_msg = "Invalid invalid keyvalp atgument"; break;
case PMI_ERR_INVALID_SIZE: err_msg = "Invalid size argument"; break;
#if defined(PMI_ERR_INVALID_KVS)
/* pmi.h calls this a valid return code but mpich doesn't define it (slurm does). wtf */
case PMI_ERR_INVALID_KVS: err_msg = "Invalid kvs argument"; break;
#endif
case PMI_SUCCESS: err_msg = "Success"; break;
default: err_msg = "Unkown error";
}
return err_msg;
}

Просмотреть файл

@ -11,6 +11,9 @@
#include "ompi/constants.h"
#include <pmi.h>
#if WANT_CRAY_PMI2_EXT
#include <pmi2.h>
#endif
#include "orte/util/proc_info.h"
@ -46,39 +49,62 @@ static int pubsub_pmi_component_open(void)
static int pubsub_pmi_component_close(void)
{
#if WANT_CRAY_PMI2_EXT
if (PMI2_Initialized()) {
PMI2_Finalize();
}
#else
PMI_BOOL initialized;
/* if we weren't selected, cleanup if necessary */
/* if we weren't selected, cleanup */
if (PMI_SUCCESS == PMI_Initialized(&initialized) &&
PMI_TRUE == initialized) {
PMI_Finalize();
}
#endif
return OMPI_SUCCESS;
}
static bool pmi_startup(void)
{
#if WANT_CRAY_PMI2_EXT
int spawned, size, rank, appnum;
if (PMI2_Initialized()) {
/* already initialized */
return true;
}
/* if we can't startup PMI, we can't be used */
if (PMI_SUCCESS != PMI2_Init(&spawned, &size, &rank, &appnum)) {
return false;
}
/* ignore the info - we'll pick it up elsewhere */
return true;
#else
PMI_BOOL initialized;
if (PMI_SUCCESS != PMI_Init(&initialized)) {
return false;
}
if (PMI_TRUE != initialized) {
if (PMI_SUCCESS != PMI_Init(&initialized)) {
return false;
}
}
return true;
#endif
}
static int pubsub_pmi_component_query(mca_base_module_t **module, int *priority)
{
int spawned;
PMI_BOOL initialized;
/* for now, only use PMI when direct launched */
if (NULL == orte_process_info.my_hnp_uri &&
PMI_SUCCESS == PMI_Initialized(&initialized)) {
/* if we aren't already initialized, then try */
if (PMI_TRUE != initialized) {
/* if we can't startup the PMI, we can't be used */
if (PMI_SUCCESS != PMI_Init(&spawned)) {
*priority = -1;
*module = NULL;
return OMPI_ERROR;
}
}
/* if we were able to startup PMI, or it was already
* running, then use us
*/
pmi_startup()) {
/* if PMI is available, use it */
*priority = 100;
*module = (mca_base_module_t *)&ompi_pubsub_pmi_module;
return OMPI_SUCCESS;
return ORTE_SUCCESS;
}
/* we can't run */

Просмотреть файл

@ -27,8 +27,13 @@ AC_DEFUN([ORTE_CHECK_PMI],[
[AC_HELP_STRING([--with-pmi],
[Build PMI support (default: no)])],
[], with_pmi=no)
AC_ARG_WITH([cray-pmi2-ext],
[AC_HELP_STRING([--with-cray-pmi-ext],
[Include Cray PMI2 extensions (default: no)])],
[], with_cray_pmi2_ext=no)
orte_enable_pmi=0
orte_use_cray_pmi2_ext=0
# save flags
orte_check_pmi_$1_save_CPPFLAGS="$CPPFLAGS"
@ -42,7 +47,8 @@ AC_DEFUN([ORTE_CHECK_PMI],[
AC_MSG_CHECKING([if user requested PMI support])
AS_IF([test "$with_pmi" = "no"],
[AC_MSG_RESULT([no])
orte_want_pmi_support=no],
orte_want_pmi_support=no
orte_use_cray_pmi2_ext=0],
[AC_MSG_RESULT([yes])
orte_want_pmi_support=yes
AC_MSG_CHECKING([if PMI support installed])
@ -53,13 +59,13 @@ AC_DEFUN([ORTE_CHECK_PMI],[
[AS_IF([test -d "$with_pmi/lib64"],
[orte_check_pmi_$1_LDFLAGS="-L$with_pmi/lib64"],
[orte_check_pmi_$1_LDFLAGS="-L$with_pmi/lib"])
AS_IF([test -f "$with_pmi/include/pmi.h"],
[orte_check_pmi_$1_CPPFLAGS="-I$with_pmi/include"],
[AS_IF([test -f "$with_pmi/include/slurm/pmi.h"],
[orte_check_pmi_$1_CPPFLAGS="-I$with_pmi/include/slurm"],
[])])],
AS_IF([test -f "$with_pmi/include/pmi.h"],
[orte_check_pmi_$1_CPPFLAGS="-I$with_pmi/include"],
[AS_IF([test -f "$with_pmi/include/slurm/pmi.h"],
[orte_check_pmi_$1_CPPFLAGS="-I$with_pmi/include/slurm"])])],
[AS_IF([test -f "/usr/include/slurm/pmi.h"],
[orte_check_pmi_$1_CPPFLAGS="-I/usr/include/slurm"])])
LDFLAGS="$LDFLAGS $orte_check_pmi_$1_LDFLAGS"
CPPFLAGS="$CPPFLAGS $orte_check_pmi_$1_CPPFLAGS"
LIBS="$LIBS -lpmi"
@ -78,15 +84,35 @@ AC_DEFUN([ORTE_CHECK_PMI],[
[AC_MSG_RESULT([no])
AC_MSG_WARN([PMI support requested (via --with-pmi) but not found.])
AC_MSG_ERROR([Aborting.])
$3])])
$3])
# restore flags
CPPFLAGS="$orte_check_pmi_$1_save_CPPFLAGS"
AC_MSG_CHECKING([if user requested Cray PMI2 extensions])
AS_IF([test "$with_cray_pmi2_ext" = "no"],
[AC_MSG_RESULT([no])
orte_use_pmi2_ext=0],
[AC_MSG_RESULT([yes])
# check to see if pmi2.h header is present. if it is, then we
# will use some of the functions in it.
AC_MSG_CHECKING([if PMI2 extensions installed])
AS_IF([test -f "$with_pmi/include/pmi2.h"],
[orte_use_pmi2_ext=1],
[AC_MSG_RESULT([no])
AC_MSG_WARN([PMI2 extensions requested (via --with-cray-pmi2-ext) but not found.])
AC_MSG_ERROR([Aborting.])
orte_use_pmi2_ext=0
orte_enable_pmi=0
$3])])])
# restore flags - have to add CPPFLAGS so base functions can find pmi.h
CPPFLAGS="$orte_check_pmi_$1_save_CPPFLAGS $orte_check_pmi_$1_CPPFLAGS"
LDFLAGS="$orte_check_pmi_$1_save_LDFLAGS"
LIBS="$orte_check_pmi_$1_save_LIBS"
AC_DEFINE_UNQUOTED([WANT_PMI_SUPPORT],
[$orte_enable_pmi],
[Whether we want PMI support])
AC_DEFINE_UNQUOTED([WANT_CRAY_PMI2_EXT],
[$orte_use_pmi2_ext],
[Whether we want to use Cray PMI2 extensions])
AM_CONDITIONAL(WANT_PMI_SUPPORT, [test "$orte_enable_pmi" = 1])
])

Просмотреть файл

@ -45,6 +45,10 @@
#include <stdlib.h>
#include <stdarg.h>
#if WANT_PMI_SUPPORT
#include <pmi.h>
#endif
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
@ -209,6 +213,39 @@ void orte_errmgr_base_log(int error_code, char *filename, int line)
}
}
#if WANT_PMI_SUPPORT
/* useful util */
char* orte_errmgr_base_pmi_error(int pmi_err)
{
char * err_msg;
switch(pmi_err) {
case PMI_FAIL: err_msg = "Operation failed"; break;
case PMI_ERR_INIT: err_msg = "PMI is not initialized"; break;
case PMI_ERR_NOMEM: err_msg = "Input buffer not large enough"; break;
case PMI_ERR_INVALID_ARG: err_msg = "Invalid argument"; break;
case PMI_ERR_INVALID_KEY: err_msg = "Invalid key argument"; break;
case PMI_ERR_INVALID_KEY_LENGTH: err_msg = "Invalid key length argument"; break;
case PMI_ERR_INVALID_VAL: err_msg = "Invalid value argument"; break;
case PMI_ERR_INVALID_VAL_LENGTH: err_msg = "Invalid value length argument"; break;
case PMI_ERR_INVALID_LENGTH: err_msg = "Invalid length argument"; break;
case PMI_ERR_INVALID_NUM_ARGS: err_msg = "Invalid number of arguments"; break;
case PMI_ERR_INVALID_ARGS: err_msg = "Invalid args argument"; break;
case PMI_ERR_INVALID_NUM_PARSED: err_msg = "Invalid num_parsed length argument"; break;
case PMI_ERR_INVALID_KEYVALP: err_msg = "Invalid invalid keyvalp atgument"; break;
case PMI_ERR_INVALID_SIZE: err_msg = "Invalid size argument"; break;
#if defined(PMI_ERR_INVALID_KVS)
/* pmi.h calls this a valid return code but mpich doesn't define it (slurm does). wtf */
case PMI_ERR_INVALID_KVS: err_msg = "Invalid kvs argument"; break;
#endif
case PMI_SUCCESS: err_msg = "Success"; break;
default: err_msg = "Unkown error";
}
return err_msg;
}
#endif
void orte_errmgr_base_abort(int error_code, char *fmt, ...)
{
va_list arglist;

Просмотреть файл

@ -150,6 +150,16 @@ OBJ_CLASS_DECLARATION(orte_errmgr_predicted_map_t);
#define ORTE_ERROR_LOG(n) \
orte_errmgr.log(n, __FILE__, __LINE__);
#if WANT_PMI_SUPPORT
#define ORTE_PMI_ERROR(pmi_err, pmi_func) \
do { \
opal_output(0, "%s[%s:%d:%s] %s: %s\n", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, __func__, \
pmi_func, orte_errmgr_base_pmi_error(pmi_err)); \
} while(0);
OPAL_DECLSPEC char* orte_errmgr_base_pmi_error(int pmi_err);
#endif
/*
* Framework Interfaces

Просмотреть файл

@ -1,5 +1,7 @@
/*
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC. All
* rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -17,6 +19,9 @@
#include "orte/constants.h"
#include <pmi.h>
#if WANT_CRAY_PMI2_EXT
#include <pmi2.h>
#endif
#include "orte/util/proc_info.h"
@ -60,28 +65,46 @@ static int pmi_component_open(void)
return ORTE_SUCCESS;
}
static bool pmi_startup(void)
{
#if WANT_CRAY_PMI2_EXT
int spawned, size, rank, appnum;
if (PMI2_Initialized()) {
/* already initialized */
return true;
}
/* if we can't startup PMI, we can't be used */
if (PMI_SUCCESS != PMI2_Init(&spawned, &size, &rank, &appnum)) {
return false;
}
/* ignore the info - we'll pick it up elsewhere */
return true;
#else
PMI_BOOL initialized;
if (PMI_SUCCESS != PMI_Initialized(&initialized)) {
return false;
}
if (PMI_TRUE != initialized) {
if (PMI_SUCCESS != PMI_Init(&initialized)) {
return false;
}
}
return true;
#endif
}
static int pmi_component_query(mca_base_module_t **module, int *priority)
{
int spawned;
PMI_BOOL initialized;
/* for now, only use PMI when direct launched */
if (!ORTE_PROC_IS_HNP &&
NULL == orte_process_info.my_hnp_uri &&
PMI_SUCCESS == PMI_Initialized(&initialized)) {
if (PMI_TRUE != initialized) {
/* if we can't startup the PMI, we can't be used */
if (PMI_SUCCESS != PMI_Init(&spawned)) {
*priority = -1;
*module = NULL;
return ORTE_ERROR;
}
/* if PMI is available, use it */
*priority = 100;
*module = (mca_base_module_t *)&orte_ess_pmi_module;
return ORTE_SUCCESS;
}
pmi_startup()) {
/* if PMI is available, use it */
*priority = 100;
*module = (mca_base_module_t *)&orte_ess_pmi_module;
return ORTE_SUCCESS;
}
/* we can't run */
@ -93,6 +116,11 @@ static int pmi_component_query(mca_base_module_t **module, int *priority)
static int pmi_component_close(void)
{
#if WANT_CRAY_PMI2_EXT
if (PMI2_Initialized()) {
PMI2_Finalize();
}
#else
PMI_BOOL initialized;
/* if we weren't selected, cleanup */
@ -100,6 +128,7 @@ static int pmi_component_close(void)
PMI_TRUE == initialized) {
PMI_Finalize();
}
#endif
return ORTE_SUCCESS;
}

Просмотреть файл

@ -80,14 +80,6 @@ orte_ess_base_module_t orte_ess_pmi_module = {
static bool app_init_complete=false;
static int pmi_maxlen=0;
static char* pmi_error(int pmi_err);
#define ORTE_PMI_ERROR(pmi_err, pmi_func) \
do { \
opal_output(0, "%s[%s:%d:%s] %s: %s\n", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, __func__, \
pmi_func, pmi_error(pmi_err)); \
} while(0);
/**** MODULE FUNCTIONS ****/
@ -307,33 +299,3 @@ static void rte_abort(int error_code, bool report)
{
orte_ess_base_app_abort(error_code, report);
}
/* useful util */
static char* pmi_error(int pmi_err)
{
char * err_msg;
switch(pmi_err) {
case PMI_FAIL: err_msg = "Operation failed"; break;
case PMI_ERR_INIT: err_msg = "PMI is not initialized"; break;
case PMI_ERR_NOMEM: err_msg = "Input buffer not large enough"; break;
case PMI_ERR_INVALID_ARG: err_msg = "Invalid argument"; break;
case PMI_ERR_INVALID_KEY: err_msg = "Invalid key argument"; break;
case PMI_ERR_INVALID_KEY_LENGTH: err_msg = "Invalid key length argument"; break;
case PMI_ERR_INVALID_VAL: err_msg = "Invalid value argument"; break;
case PMI_ERR_INVALID_VAL_LENGTH: err_msg = "Invalid value length argument"; break;
case PMI_ERR_INVALID_LENGTH: err_msg = "Invalid length argument"; break;
case PMI_ERR_INVALID_NUM_ARGS: err_msg = "Invalid number of arguments"; break;
case PMI_ERR_INVALID_ARGS: err_msg = "Invalid args argument"; break;
case PMI_ERR_INVALID_NUM_PARSED: err_msg = "Invalid num_parsed length argument"; break;
case PMI_ERR_INVALID_KEYVALP: err_msg = "Invalid invalid keyvalp atgument"; break;
case PMI_ERR_INVALID_SIZE: err_msg = "Invalid size argument"; break;
#if defined(PMI_ERR_INVALID_KVS)
/* pmi.h calls this a valid return code but mpich doesn't define it (slurm does). wtf */
case PMI_ERR_INVALID_KVS: err_msg = "Invalid kvs argument"; break;
#endif
case PMI_SUCCESS: err_msg = "Success"; break;
default: err_msg = "Unkown error";
}
return err_msg;
}

Просмотреть файл

@ -1,17 +1,22 @@
/* -*- C -*-
*
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
*
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC. All
* rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <pmi.h>
#if WANT_CRAY_PMI2_EXT
#include <pmi2.h>
#endif
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_param.h"
@ -50,40 +55,63 @@ int orte_grpcomm_pmi_open(void)
int orte_grpcomm_pmi_close(void)
{
#if WANT_CRAY_PMI2_EXT
if (PMI2_Initialized()) {
PMI2_Finalize();
}
#else
PMI_BOOL initialized;
/* if we weren't selected, cleanup if necessary */
/* if we weren't selected, cleanup */
if (PMI_SUCCESS == PMI_Initialized(&initialized) &&
PMI_TRUE == initialized) {
PMI_Finalize();
}
#endif
return ORTE_SUCCESS;
}
static bool pmi_startup(void)
{
#if WANT_CRAY_PMI2_EXT
int spawned, size, rank, appnum;
if (PMI2_Initialized()) {
/* already initialized */
return true;
}
/* if we can't startup PMI, we can't be used */
if (PMI_SUCCESS != PMI2_Init(&spawned, &size, &rank, &appnum)) {
return false;
}
/* ignore the info - we'll pick it up elsewhere */
return true;
#else
PMI_BOOL initialized;
if (PMI_SUCCESS != PMI_Init(&initialized)) {
return false;
}
if (PMI_TRUE != initialized) {
if (PMI_SUCCESS != PMI_Init(&initialized)) {
return false;
}
}
return true;
#endif
}
int orte_grpcomm_pmi_component_query(mca_base_module_t **module, int *priority)
{
int spawned;
PMI_BOOL initialized;
/* for now, only use PMI when direct launched */
if (!ORTE_PROC_IS_HNP &&
NULL == orte_process_info.my_hnp_uri &&
PMI_SUCCESS == PMI_Initialized(&initialized)) {
/* if we aren't already initialized, then try */
if (PMI_TRUE != initialized) {
/* if we can't startup the PMI, we can't be used */
if (PMI_SUCCESS != PMI_Init(&spawned)) {
*priority = -1;
*module = NULL;
return ORTE_ERROR;
}
}
/* if we were able to startup PMI, or it was already
* running, then use us
*/
pmi_startup()) {
/* if PMI is available, use it */
*priority = 100;
*module = (mca_base_module_t *)&orte_grpcomm_pmi_module;
return ORTE_SUCCESS;
return ORTE_SUCCESS;
}
/* we can't run */

Просмотреть файл

@ -2,6 +2,8 @@
* Copyright (c) 2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC. All
* rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -15,6 +17,9 @@
#include <string.h>
#include <pmi.h>
#if WANT_CRAY_PMI2_EXT
#include <pmi2.h>
#endif
#include "opal/dss/dss.h"
#include "opal/mca/hwloc/base/base.h"
@ -62,14 +67,6 @@ orte_grpcomm_base_module_t orte_grpcomm_pmi_module = {
static int pmi_encode(const void *val, size_t vallen);
static void* pmi_decode(size_t *retlen);
static char* pmi_error(int pmi_err);
#define ORTE_PMI_ERROR(pmi_err, pmi_func) \
do { \
opal_output(0, "%s[%s:%d:%s] %s: %s\n", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, __func__, \
pmi_func, pmi_error(pmi_err)); \
} while(0);
static int setup_pmi(void);
static int setup_key(const orte_process_name_t *name, const char *key);
@ -80,6 +77,45 @@ static char *pmi_attr_val = NULL;
static int pmi_vallen_max = -1;
static int pmi_keylen_max = -1;
/* Because Cray uses PMI2 extensions for some, but not all,
* PMI functions, we define a set of wrappers for those
* common functions we will use
*/
static int kvs_put(const char *key, const char *value)
{
#if WANT_CRAY_PMI2_EXT
return PMI2_KVS_Put(key, value);
#else
return PMI_KVS_Put(pmi_kvs_name, key, value);
#endif
}
static int kvs_get(const char *key, char *value, int valuelen)
{
#if WANT_CRAY_PMI2_EXT
int len;
return PMI2_KVS_Get(pmi_kvs_name, PMI2_ID_NULL, key, value, valuelen, &len);
#else
return PMI_KVS_Get(pmi_kvs_name, key, value, valuelen);
#endif
}
static int kvs_commit(void)
{
#if WANT_CRAY_PMI2_EXT
return PMI2_KVS_Fence())) {
#else
int rc;
if (PMI_SUCCESS != (rc = PMI_KVS_Commit(pmi_kvs_name))) {
return rc;
}
/* Barrier here to ensure all other procs have committed */
return PMI_Barrier();
#endif
}
/**
* Initialize the module
*/
@ -146,11 +182,19 @@ static int pmi_barrier(void)
return ORTE_SUCCESS;
}
#if WANT_CRAY_PMI2_EXT
/* Cray doesn't provide a barrier, so use the Fence function here */
if (PMI_SUCCESS != (rc = PMI2_KVS_Fence())) {
ORTE_PMI_ERROR(rc, "PMI2_KVS_Fence");
return ORTE_ERROR;
}
#else
/* use the PMI barrier function */
if (PMI_SUCCESS != (rc = PMI_Barrier())) {
ORTE_PMI_ERROR(rc, "PMI_Barrier");
return ORTE_ERROR;
}
#endif
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi barrier complete",
@ -201,7 +245,7 @@ static int pmi_set_proc_attr(const char* attr_name,
return rc;
}
rc = PMI_KVS_Put(pmi_kvs_name, pmi_kvs_key, pmi_attr_val);
rc = kvs_put(pmi_kvs_key, pmi_attr_val);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
return ORTE_ERROR;
@ -237,7 +281,7 @@ static int pmi_get_proc_attr(const orte_process_name_t name,
return rc;
}
rc = PMI_KVS_Get(pmi_kvs_name, pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get");
return ORTE_ERROR;
@ -259,6 +303,7 @@ static int pmi_get_proc_attr(const orte_process_name_t name,
static int modex(opal_list_t *procs)
{
int rc, i;
size_t len;
char *rml_uri, val[64];
orte_vpid_t v;
orte_process_name_t name;
@ -286,7 +331,7 @@ static int modex(opal_list_t *procs)
ORTE_ERROR_LOG(rc);
return rc;
}
rc = PMI_KVS_Put(pmi_kvs_name, pmi_kvs_key, orte_process_info.nodename);
rc = kvs_put(pmi_kvs_key, orte_process_info.nodename);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
return ORTE_ERROR;
@ -302,9 +347,17 @@ static int modex(opal_list_t *procs)
}
if (ORTE_SUCCESS != (rc = setup_key(ORTE_PROC_MY_NAME, "RMLURI"))) {
ORTE_ERROR_LOG(rc);
free(rml_uri);
return rc;
}
rc = PMI_KVS_Put(pmi_kvs_name, pmi_kvs_key, rml_uri);
/* NTH: some characters are not allowed in pmi2 land so we need to encode */
if (ORTE_SUCCESS != (rc = pmi_encode(rml_uri, strlen(rml_uri)))) {
ORTE_ERROR_LOG(rc);
free(rml_uri);
return rc;
}
/* encoding puts the encoded value in pmi_attr_val */
rc = kvs_put(pmi_kvs_key, pmi_attr_val);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
free(rml_uri);
@ -327,13 +380,22 @@ static int modex(opal_list_t *procs)
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi LOCALE %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), locale));
/* NTH: some characters are not allowed in pmi2 land - not sure
* if hwloc would use them, but just to be safe we need to encode
*/
if (ORTE_SUCCESS != (rc = pmi_encode(locale, strlen(locale)))) {
ORTE_ERROR_LOG(rc);
free(locale);
return rc;
}
/* get the key */
if (ORTE_SUCCESS != (rc = setup_key(ORTE_PROC_MY_NAME, "HWLOC"))) {
ORTE_ERROR_LOG(rc);
free(locale);
return rc;
}
/* enter the key-value */
rc = PMI_KVS_Put(pmi_kvs_name, pmi_kvs_key, locale);
/* encoding puts the encoded value in pmi_attr_val */
rc = kvs_put(pmi_kvs_key, pmi_attr_val);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
free(locale);
@ -355,7 +417,7 @@ static int modex(opal_list_t *procs)
return rc;
}
snprintf(val, 64, "%lu", (unsigned long)pmap->local_rank);
rc = PMI_KVS_Put(pmi_kvs_name, pmi_kvs_key, val);
rc = kvs_put(pmi_kvs_key, val);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
return ORTE_ERROR;
@ -365,23 +427,18 @@ static int modex(opal_list_t *procs)
return rc;
}
snprintf(val, 64, "%lu", (unsigned long)pmap->node_rank);
rc = PMI_KVS_Put(pmi_kvs_name, pmi_kvs_key, val);
rc = kvs_put(pmi_kvs_key, val);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Put");
return ORTE_ERROR;
}
/* commit our modex info */
if (PMI_SUCCESS != (rc = PMI_KVS_Commit(pmi_kvs_name))) {
if (PMI_SUCCESS != (rc = kvs_commit())) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Commit failed");
return ORTE_ERROR;
}
/* Barrier here to ensure all other procs have committed */
if (ORTE_SUCCESS != (rc = pmi_barrier())) {
return rc;
}
/* harvest the oob endpoint info and hostname for all other procs
* in our job so oob wireup can be completed and we
* can setup their nidmap/pidmap
@ -393,28 +450,37 @@ static int modex(opal_list_t *procs)
continue;
}
name.vpid = v;
if (ORTE_SUCCESS != (rc = setup_key(&name, "RMLURI"))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = PMI_KVS_Get(pmi_kvs_name, pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get");
return ORTE_ERROR;
}
/* Had to encode to protect against pmi2-prohibited chars */
rml_uri = pmi_decode(&len);
if (NULL == rml_uri) {
return ORTE_ERROR;
}
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi: proc %s oob endpoint %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name), pmi_attr_val));
ORTE_NAME_PRINT(&name), rml_uri));
/* set the contact info into the hash table */
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(pmi_attr_val))) {
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(rml_uri))) {
free(rml_uri);
return rc;
}
free(rml_uri);
if (ORTE_SUCCESS != (rc = setup_key(&name, "HOSTNAME"))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = PMI_KVS_Get(pmi_kvs_name, pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get");
return ORTE_ERROR;
@ -459,22 +525,22 @@ static int modex(opal_list_t *procs)
ORTE_ERROR_LOG(rc);
return rc;
}
rc = PMI_KVS_Get(pmi_kvs_name, pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get");
return ORTE_ERROR;
}
pmap->local_rank = (uint16_t)strtoul(pmi_attr_val, NULL, 10);
pmap->local_rank = (orte_local_rank_t)strtoul(pmi_attr_val, NULL, 10);
if (ORTE_SUCCESS != (rc = setup_key(&name, "NODERANK"))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = PMI_KVS_Get(pmi_kvs_name, pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get");
return ORTE_ERROR;
}
pmap->node_rank = (uint16_t)strtoul(pmi_attr_val, NULL, 10);
pmap->node_rank = (orte_node_rank_t)strtoul(pmi_attr_val, NULL, 10);
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi: proc %s lrank %u nrank %u",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -482,55 +548,66 @@ static int modex(opal_list_t *procs)
(unsigned int)pmap->local_rank,
(unsigned int)pmap->node_rank));
#if OPAL_HAVE_HWLOC
/* get the proc's locality info, if available */
if (ORTE_SUCCESS != (rc = setup_key(&name, "HWLOC"))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = PMI_KVS_Get(pmi_kvs_name, pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
/* don't error out here - if not found, that's okay */
if (PMI_SUCCESS == rc) {
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &name, ORTE_PROC_MY_NAME)) {
/* if this data is from myself, then set locality to all */
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi setting proc %s locale ALL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name)));
pmap->locality = OPAL_PROC_ALL_LOCAL;
} else if (loc->daemon != ORTE_PROC_MY_DAEMON->vpid) {
/* this is on a different node, then mark as non-local */
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi setting proc %s locale NONLOCAL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name)));
pmap->locality = OPAL_PROC_NON_LOCAL;
} else if (0 == strlen(pmi_attr_val)){
/* if we share a node, but we don't know anything more, then
* mark us as on the node as this is all we know
*/
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi setting proc %s locale NODE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name)));
pmap->locality = OPAL_PROC_ON_NODE;
} else {
/* convert the locale to a cpuset */
if (NULL == orte_grpcomm_base.working_cpuset) {
orte_grpcomm_base.working_cpuset = hwloc_bitmap_alloc();
{
char *locale;
/* get the proc's locality info, if available */
if (ORTE_SUCCESS != (rc = setup_key(&name, "HWLOC"))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = kvs_get(pmi_kvs_key, pmi_attr_val, pmi_vallen_max);
/* don't error out here - if not found, that's okay */
if (PMI_SUCCESS == rc) {
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &name, ORTE_PROC_MY_NAME)) {
/* if this data is from myself, then set locality to all */
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi setting proc %s locale ALL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name)));
pmap->locality = OPAL_PROC_ALL_LOCAL;
} else if (loc->daemon != ORTE_PROC_MY_DAEMON->vpid) {
/* this is on a different node, then mark as non-local */
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi setting proc %s locale NONLOCAL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name)));
pmap->locality = OPAL_PROC_NON_LOCAL;
} else if (0 == strlen(pmi_attr_val)){
/* if we share a node, but we don't know anything more, then
* mark us as on the node as this is all we know
*/
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcomm:pmi setting proc %s locale NODE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name)));
pmap->locality = OPAL_PROC_ON_NODE;
} else {
/* we encoded to protect against pmi2 restrictions */
locale = pmi_decode(&len);
if (NULL == locale) {
return ORTE_ERROR;
}
/* convert the locale to a cpuset */
if (NULL == orte_grpcomm_base.working_cpuset) {
orte_grpcomm_base.working_cpuset = hwloc_bitmap_alloc();
}
if (0 != hwloc_bitmap_list_sscanf(orte_grpcomm_base.working_cpuset, locale)) {
/* got a bad locale */
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
free(locale);
return ORTE_ERR_VALUE_OUT_OF_BOUNDS;
}
free(locale);
/* determine relative location on our node */
pmap->locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
opal_hwloc_my_cpuset,
orte_grpcomm_base.working_cpuset);
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcommpmi setting proc %s locale %04x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name), pmap->locality));
}
if (0 != hwloc_bitmap_list_sscanf(orte_grpcomm_base.working_cpuset, pmi_attr_val)) {
/* got a bad locale */
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
return ORTE_ERR_VALUE_OUT_OF_BOUNDS;
}
/* determine relative location on our node */
pmap->locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
opal_hwloc_my_cpuset,
orte_grpcomm_base.working_cpuset);
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s grpcommpmi setting proc %s locale %04x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name), pmap->locality));
}
}
#endif
@ -598,68 +675,56 @@ static void* pmi_decode(size_t *retlen) {
return ret;
}
/* useful util */
static char* pmi_error(int pmi_err)
{
char * err_msg;
switch(pmi_err) {
case PMI_FAIL: err_msg = "Operation failed"; break;
case PMI_ERR_INIT: err_msg = "PMI is not initialized"; break;
case PMI_ERR_NOMEM: err_msg = "Input buffer not large enough"; break;
case PMI_ERR_INVALID_ARG: err_msg = "Invalid argument"; break;
case PMI_ERR_INVALID_KEY: err_msg = "Invalid key argument"; break;
case PMI_ERR_INVALID_KEY_LENGTH: err_msg = "Invalid key length argument"; break;
case PMI_ERR_INVALID_VAL: err_msg = "Invalid value argument"; break;
case PMI_ERR_INVALID_VAL_LENGTH: err_msg = "Invalid value length argument"; break;
case PMI_ERR_INVALID_LENGTH: err_msg = "Invalid length argument"; break;
case PMI_ERR_INVALID_NUM_ARGS: err_msg = "Invalid number of arguments"; break;
case PMI_ERR_INVALID_ARGS: err_msg = "Invalid args argument"; break;
case PMI_ERR_INVALID_NUM_PARSED: err_msg = "Invalid num_parsed length argument"; break;
case PMI_ERR_INVALID_KEYVALP: err_msg = "Invalid invalid keyvalp atgument"; break;
case PMI_ERR_INVALID_SIZE: err_msg = "Invalid size argument"; break;
#if defined(PMI_ERR_INVALID_KVS)
/* pmi.h calls this a valid return code but mpich doesn't define it (slurm does). wtf */
case PMI_ERR_INVALID_KVS: err_msg = "Invalid kvs argument"; break;
#endif
case PMI_SUCCESS: err_msg = "Success"; break;
default: err_msg = "Unkown error";
}
return err_msg;
}
static int setup_pmi(void)
{
int max_length, rc;
#if WANT_CRAY_PMI2_EXT
pmi_vallen_max = PMI2_MAX_VALLEN;
#else
rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max);
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_Get_value_length_max");
return ORTE_ERROR;
}
#endif
pmi_attr_val = malloc(pmi_vallen_max);
if (NULL == pmi_attr_val) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
#if WANT_CRAY_PMI2_EXT
/* TODO -- is this ok */
max_length = 1024;
#else
if (PMI_SUCCESS != (rc = PMI_KVS_Get_name_length_max(&max_length))) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max");
return ORTE_ERROR;
}
#endif
pmi_kvs_name = malloc(max_length);
if (NULL == pmi_kvs_name) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
#if WANT_CRAY_PMI2_EXT
rc = PMI2_Job_GetId(pmi_kvs_name, max_length);
#else
rc = PMI_KVS_Get_my_name(pmi_kvs_name,max_length);
#endif
if (PMI_SUCCESS != rc) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get_my_name");
return ORTE_ERROR;
}
#if WANT_CRAY_PMI2_EXT
pmi_keylen_max = PMI2_MAX_KEYLEN;
#else
if (PMI_SUCCESS != (rc = PMI_KVS_Get_key_length_max(&pmi_keylen_max))) {
ORTE_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max");
return ORTE_ERROR;
}
#endif
pmi_kvs_key = malloc(pmi_keylen_max);
return ORTE_SUCCESS;