***** THIS INCLUDES A SMALL CHANGE IN THE MPI-RTE INTERFACE *****
Fix two problems that surfaced when using direct launch under SLURM: 1. locally store our own data because some BTLs want to retrieve it during add_procs rather than use what they have internally 2. cleanup MPI_Abort so it correctly passes the error status all the way down to the actual exit. When someone implemented the "abort_peers" API, they left out the error status. So we lost it at that point and *always* exited with a status of 1. This forces a change to the API to include the status. cmr:v1.7.3:reviewer=jsquyres:subject=Fix MPI_Abort and modex_recv for direct launch This commit was SVN r29405.
Этот коммит содержится в:
родитель
7de2179866
Коммит
9902748108
@ -71,7 +71,7 @@ typedef orte_local_rank_t ompi_local_rank_t;
|
||||
|
||||
/* Error handling objects and operations */
|
||||
OMPI_DECLSPEC void ompi_rte_abort(int error_code, char *fmt, ...);
|
||||
#define ompi_rte_abort_peers(a, b) orte_errmgr.abort_peers(a, b)
|
||||
#define ompi_rte_abort_peers(a, b, c) orte_errmgr.abort_peers(a, b, c)
|
||||
#define OMPI_RTE_ERRHANDLER_FIRST ORTE_ERRMGR_CALLBACK_FIRST
|
||||
#define OMPI_RTE_ERRHANDLER_LAST ORTE_ERRMGR_CALLBACK_LAST
|
||||
#define OMPI_RTE_ERRHANDLER_PREPEND ORTE_ERRMGR_CALLBACK_PREPEND
|
||||
|
@ -103,7 +103,7 @@ OMPI_DECLSPEC extern bool ompi_rte_proc_is_bound;
|
||||
|
||||
/* Error handling objects and operations */
|
||||
OMPI_DECLSPEC void ompi_rte_abort(int error_code, char *fmt, ...);
|
||||
OMPI_DECLSPEC int ompi_rte_abort_peers(ompi_process_name_t *procs, size_t nprocs);
|
||||
OMPI_DECLSPEC int ompi_rte_abort_peers(ompi_process_name_t *procs, size_t nprocs, int status);
|
||||
OMPI_DECLSPEC int ompi_rte_error_log(const char *file, int line,
|
||||
const char *func, int ret);
|
||||
#define OMPI_ERROR_LOG(ret) ompi_rte_error_log(__FILE__, __LINE__, __func__, ret)
|
||||
|
@ -45,9 +45,9 @@ ompi_rte_abort(int error_code, char *fmt, ...)
|
||||
|
||||
|
||||
int
|
||||
ompi_rte_abort_peers(ompi_process_name_t *procs, size_t nprocs)
|
||||
ompi_rte_abort_peers(ompi_process_name_t *procs, size_t nprocs, int status)
|
||||
{
|
||||
PMI_Abort(1, "");
|
||||
PMI_Abort(status, "");
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -194,8 +194,8 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
|
||||
/*
|
||||
* Abort peers in this communicator group. Does not include self.
|
||||
*/
|
||||
if( OMPI_SUCCESS != (ret = ompi_rte_abort_peers(abort_procs, nabort_procs)) ) {
|
||||
ompi_rte_abort(ret, "Open MPI failed to abort all of the procs requested (%d).", ret);
|
||||
if( OMPI_SUCCESS != (ret = ompi_rte_abort_peers(abort_procs, nabort_procs, errcode)) ) {
|
||||
ompi_rte_abort(errcode, "Open MPI failed to abort all of the procs requested (%d).", ret);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -345,12 +345,17 @@ static int fetch(const opal_identifier_t *uid,
|
||||
/* lookup the proc data object for this proc */
|
||||
if (NULL == (proc_data = lookup_opal_proc(&hash_data, id))) {
|
||||
/* maybe they can find it elsewhere */
|
||||
OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
|
||||
"db_hash:fetch data for proc %" PRIu64 " not found", id));
|
||||
return OPAL_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
/* find the value */
|
||||
if (NULL == (kv = lookup_keyval(proc_data, key))) {
|
||||
/* let them look globally for it */
|
||||
OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output,
|
||||
"db_hash:fetch key %s for proc %" PRIu64 " not found",
|
||||
(NULL == key) ? "NULL" : key, id));
|
||||
return OPAL_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
|
@ -384,6 +384,7 @@ static int store(const opal_identifier_t *uid,
|
||||
const char *key, const void *data, opal_data_type_t type)
|
||||
{
|
||||
opal_identifier_t proc;
|
||||
int rc;
|
||||
|
||||
/* to protect alignment, copy the data across */
|
||||
memcpy(&proc, uid, sizeof(opal_identifier_t));
|
||||
@ -398,7 +399,16 @@ static int store(const opal_identifier_t *uid,
|
||||
"db:pmi:store: storing key %s[%s] for proc %" PRIu64 "",
|
||||
key, opal_dss.lookup_data_type(type), proc));
|
||||
|
||||
return pmi_store_encoded (uid, key, data, type);
|
||||
if (OPAL_SUCCESS != (rc = pmi_store_encoded (uid, key, data, type))) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* we want our internal data to be stored internally
|
||||
* as well since some of the upper layer components
|
||||
* want to retrieve it
|
||||
*/
|
||||
return OPAL_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
static int store_pointer(const opal_identifier_t *uid,
|
||||
@ -419,8 +429,14 @@ static int store_pointer(const opal_identifier_t *uid,
|
||||
/* just push this to PMI */
|
||||
if (OPAL_SUCCESS != (rc = store(uid, kv->scope, kv->key, (void*)&kv->data, kv->type))) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
return rc;
|
||||
|
||||
/* we want our internal data to be stored internally
|
||||
* as well since some of the upper layer components
|
||||
* want to retrieve it
|
||||
*/
|
||||
return OPAL_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
static void commit(const opal_identifier_t *proc)
|
||||
|
@ -240,7 +240,9 @@ void orte_errmgr_base_register_migration_warning(struct timeval *tv)
|
||||
return;
|
||||
}
|
||||
|
||||
int orte_errmgr_base_abort_peers(orte_process_name_t *procs, orte_std_cntr_t num_procs)
|
||||
int orte_errmgr_base_abort_peers(orte_process_name_t *procs,
|
||||
orte_std_cntr_t num_procs,
|
||||
int error_code)
|
||||
{
|
||||
return ORTE_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
@ -72,7 +72,8 @@ ORTE_DECLSPEC void orte_errmgr_base_log(int error_code, char *filename, int line
|
||||
ORTE_DECLSPEC void orte_errmgr_base_abort(int error_code, char *fmt, ...)
|
||||
__opal_attribute_format__(__printf__, 2, 3);
|
||||
ORTE_DECLSPEC int orte_errmgr_base_abort_peers(orte_process_name_t *procs,
|
||||
orte_std_cntr_t num_procs);
|
||||
orte_std_cntr_t num_procs,
|
||||
int error_code);
|
||||
|
||||
ORTE_DECLSPEC void orte_errmgr_base_register_migration_warning(struct timeval *tv);
|
||||
|
||||
|
@ -48,7 +48,8 @@ static int init(void);
|
||||
static int finalize(void);
|
||||
|
||||
static int abort_peers(orte_process_name_t *procs,
|
||||
orte_std_cntr_t num_procs);
|
||||
orte_std_cntr_t num_procs,
|
||||
int error_code);
|
||||
|
||||
/******************
|
||||
* HNP module
|
||||
@ -131,14 +132,16 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
OBJ_RELEASE(caddy);
|
||||
}
|
||||
|
||||
static int abort_peers(orte_process_name_t *procs, orte_std_cntr_t num_procs)
|
||||
static int abort_peers(orte_process_name_t *procs,
|
||||
orte_std_cntr_t num_procs,
|
||||
int error_code)
|
||||
{
|
||||
/* just abort */
|
||||
if (0 < opal_output_get_verbosity(orte_errmgr_base_framework.framework_output)) {
|
||||
orte_errmgr_base_abort(ORTE_ERROR_DEFAULT_EXIT_CODE, "%s called abort_peers",
|
||||
orte_errmgr_base_abort(error_code, "%s called abort_peers",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
} else {
|
||||
orte_errmgr_base_abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
|
||||
orte_errmgr_base_abort(error_code, NULL);
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -177,7 +178,8 @@ __opal_attribute_format_funcptr__(__printf__, 2, 3);
|
||||
* communicator group before aborting itself.
|
||||
*/
|
||||
typedef int (*orte_errmgr_base_module_abort_peers_fn_t)(orte_process_name_t *procs,
|
||||
orte_std_cntr_t num_procs);
|
||||
orte_std_cntr_t num_procs,
|
||||
int error_code);
|
||||
|
||||
/**
|
||||
* Predicted process/node failure notification
|
||||
|
@ -290,30 +290,6 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* if we are an ORTE app - and not an MPI app - then
|
||||
* we need to exchange our connection info here.
|
||||
* MPI_Init has its own modex, so we don't need to do
|
||||
* two of them. However, if we don't do a modex at all,
|
||||
* then processes have no way to communicate
|
||||
*
|
||||
* NOTE: only do this when the process originally launches.
|
||||
* Cannot do this on a restart as the rest of the processes
|
||||
* in the job won't be executing this step, so we would hang
|
||||
*/
|
||||
if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) {
|
||||
orte_grpcomm_collective_t coll;
|
||||
OBJ_CONSTRUCT(&coll, orte_grpcomm_collective_t);
|
||||
coll.id = orte_process_info.peer_modex;
|
||||
if (ORTE_SUCCESS != (ret = orte_grpcomm.modex(&coll))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte modex";
|
||||
goto error;
|
||||
}
|
||||
coll.active = true;
|
||||
ORTE_WAIT_FOR_COMPLETION(coll.active);
|
||||
OBJ_DESTRUCT(&coll);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
error:
|
||||
|
24
orte/mca/ess/env/ess_env_module.c
поставляемый
24
orte/mca/ess/env/ess_env_module.c
поставляемый
@ -162,6 +162,30 @@ static int rte_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* if we are an ORTE app - and not an MPI app - then
|
||||
* we need to exchange our connection info here.
|
||||
* MPI_Init has its own modex, so we don't need to do
|
||||
* two of them. However, if we don't do a modex at all,
|
||||
* then processes have no way to communicate
|
||||
*
|
||||
* NOTE: only do this when the process originally launches.
|
||||
* Cannot do this on a restart as the rest of the processes
|
||||
* in the job won't be executing this step, so we would hang
|
||||
*/
|
||||
if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) {
|
||||
orte_grpcomm_collective_t coll;
|
||||
OBJ_CONSTRUCT(&coll, orte_grpcomm_collective_t);
|
||||
coll.id = orte_process_info.peer_modex;
|
||||
coll.active = true;
|
||||
if (ORTE_SUCCESS != (ret = orte_grpcomm.modex(&coll))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte modex";
|
||||
goto error;
|
||||
}
|
||||
ORTE_WAIT_FOR_COMPLETION(coll.active);
|
||||
OBJ_DESTRUCT(&coll);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
error:
|
||||
|
@ -53,6 +53,7 @@
|
||||
|
||||
#include "opal/mca/db/db.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/show_help.h"
|
||||
@ -387,6 +388,30 @@ static int rte_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* if we are an ORTE app - and not an MPI app - then
|
||||
* we need to exchange our connection info here.
|
||||
* MPI_Init has its own modex, so we don't need to do
|
||||
* two of them. However, if we don't do a modex at all,
|
||||
* then processes have no way to communicate
|
||||
*
|
||||
* NOTE: only do this when the process originally launches.
|
||||
* Cannot do this on a restart as the rest of the processes
|
||||
* in the job won't be executing this step, so we would hang
|
||||
*/
|
||||
if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) {
|
||||
orte_grpcomm_collective_t coll;
|
||||
OBJ_CONSTRUCT(&coll, orte_grpcomm_collective_t);
|
||||
coll.id = orte_process_info.peer_modex;
|
||||
coll.active = true;
|
||||
if (ORTE_SUCCESS != (ret = orte_grpcomm.modex(&coll))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte modex";
|
||||
goto error;
|
||||
}
|
||||
ORTE_WAIT_FOR_COMPLETION(coll.active);
|
||||
OBJ_DESTRUCT(&coll);
|
||||
}
|
||||
|
||||
/* flag that we completed init */
|
||||
app_init_complete = true;
|
||||
|
||||
@ -446,7 +471,26 @@ static int rte_finalize(void)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void rte_abort(int error_code, bool report)
|
||||
static void rte_abort(int status, bool report)
|
||||
{
|
||||
orte_ess_base_app_abort(error_code, report);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
|
||||
"%s ess:pmi:abort: abort with status %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
status));
|
||||
|
||||
/* PMI doesn't like NULL messages, but our interface
|
||||
* doesn't provide one - so rig one up here
|
||||
*/
|
||||
#if WANT_PMI2_SUPPORT
|
||||
PMI2_Abort(status, "N/A");
|
||||
#else
|
||||
PMI_Abort(status, "N/A");
|
||||
#endif
|
||||
|
||||
/* - Clean out the global structures
|
||||
* (not really necessary, but good practice) */
|
||||
orte_proc_info_finalize();
|
||||
|
||||
/* Now Exit */
|
||||
exit(status);
|
||||
}
|
||||
|
@ -80,8 +80,8 @@ int orte_grpcomm_pmi_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* only use PMI when direct launched */
|
||||
if (NULL == orte_process_info.my_hnp_uri &&
|
||||
ORTE_PROC_IS_MPI &&
|
||||
mca_common_pmi_init ()) {
|
||||
ORTE_PROC_IS_APP &&
|
||||
mca_common_pmi_init ()) {
|
||||
/* if PMI is available, make it available for use by MPI procs */
|
||||
*priority = my_priority;
|
||||
*module = (mca_base_module_t *)&orte_grpcomm_pmi_module;
|
||||
|
@ -6,11 +6,20 @@
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "mpi.h"
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int rank, size;
|
||||
int errcode;
|
||||
|
||||
if (1 < argc) {
|
||||
errcode = strtol(argv[1], NULL, 10);
|
||||
} else {
|
||||
errcode = 2;
|
||||
}
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
@ -18,8 +27,12 @@ int main(int argc, char* argv[])
|
||||
|
||||
printf("Hello, World, I am %d of %d\n", rank, size);
|
||||
|
||||
if (1 == rank) MPI_Abort(MPI_COMM_WORLD, 2);
|
||||
if (1 == rank) {
|
||||
MPI_Abort(MPI_COMM_WORLD, errcode);
|
||||
} else {
|
||||
errcode = 0;
|
||||
}
|
||||
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
return errcode;
|
||||
}
|
||||
|
@ -38,7 +38,8 @@ int main(int argc, char* argv[])
|
||||
|
||||
printf("orte_abort: Name %s Host: %s Pid %ld\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
hostname, (long)pid);
|
||||
|
||||
fflush(stdout);
|
||||
|
||||
i = 0;
|
||||
while (1) {
|
||||
i++;
|
||||
|
@ -51,20 +51,22 @@ int main(int argc, char **argv, char **envp)
|
||||
goto done;
|
||||
}
|
||||
|
||||
done:
|
||||
if (PMI_TRUE == pmi_initialized) {
|
||||
i = 0;
|
||||
while (1) {
|
||||
i++;
|
||||
pi = i / 3.14159256;
|
||||
if (i > 10000) i = 0;
|
||||
if ((pmi_rank == 3 ||
|
||||
(pmi_process_group_size <= 3 && pmi_rank == 0))
|
||||
&& i == 9995) {
|
||||
PMI_Abort(rc, "RANK0 CALLED ABORT");
|
||||
}
|
||||
i = 0;
|
||||
while (1) {
|
||||
i++;
|
||||
pi = i / 3.14159256;
|
||||
if (i > 10000) i = 0;
|
||||
if ((pmi_rank == 3 ||
|
||||
(pmi_process_group_size <= 3 && pmi_rank == 0))
|
||||
&& i == 9995) {
|
||||
asprintf(&err, "RANK%d CALLED ABORT", pmi_rank);
|
||||
fprintf(stderr, "%s\n", err);
|
||||
fflush(stderr);
|
||||
PMI_Abort(rc, err);
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
if (NULL != err) {
|
||||
fprintf(stderr, "=== ERROR [rank:%d] %s\n", pmi_rank, err);
|
||||
rc = EXIT_FAILURE;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user