![Josh Hursey](/assets/img/avatar_default.png)
assumptions in the FT restart code for the ORTE layer. This fixes those problems by having the RML completely shutdown and restart the OOB framework (instead of just the module as before). This makes it much easier to manage, and maintainable as the OOB changes in the future. The SDS now does communication as part of its startup procedure, so we need to make sure we restart the RML before the SDS so that it can communicate properly. OOB base [close|open] used a static bool to determine if they have been called previously or not. I needed to expose this boolean so that I can close() then open() the oob base in the restart procedure. The functionality has not changed, we just now have the ability to open/close the framework as many times as we need to as long as we always call them in that order. (So calling open twice in a row is not allowed as before, it is only allowed if you open(), close(), then open() again). Things seem to be working now. This commit was SVN r14515.
73 строки
2.2 KiB
C
73 строки
2.2 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "orte_config.h"
|
|
|
|
#include <stdio.h>
|
|
|
|
#include "orte/orte_constants.h"
|
|
#include "opal/mca/mca.h"
|
|
#include "opal/mca/base/base.h"
|
|
#include "orte/mca/oob/oob.h"
|
|
#include "orte/mca/oob/base/base.h"
|
|
|
|
|
|
int mca_oob_base_close(void)
|
|
{
|
|
opal_list_item_t* item;
|
|
|
|
/* Sanity check. This may be able to be removed when the rml/oob
|
|
interface is re-worked (the current infrastructure may invoke
|
|
this function twice: once as a standalone, and once via the rml
|
|
oob component). */
|
|
if (!orte_oob_base_already_opened) {
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
/* Finalize all the oob modules and free their list items */
|
|
for (item = opal_list_remove_first(&mca_oob_base_modules);
|
|
item != NULL;
|
|
item = opal_list_remove_first(&mca_oob_base_modules)) {
|
|
mca_oob_base_info_t* base = (mca_oob_base_info_t *) item;
|
|
base->oob_module->oob_fini();
|
|
OBJ_RELEASE(base);
|
|
}
|
|
|
|
/* Close all remaining available modules (may be one if this is a
|
|
OMPI RTE program, or [possibly] multiple if this is ompi_info) */
|
|
|
|
mca_base_components_close(mca_oob_base_output, &mca_oob_base_components, NULL);
|
|
|
|
OBJ_DESTRUCT(&mca_oob_base_modules);
|
|
OBJ_DESTRUCT(&mca_oob_base_components);
|
|
OBJ_DESTRUCT(&mca_oob_base_exception_handlers);
|
|
|
|
if( NULL != mca_oob_base_include )
|
|
free(mca_oob_base_include);
|
|
if( NULL != mca_oob_base_exclude )
|
|
free(mca_oob_base_exclude);
|
|
|
|
/* All done */
|
|
orte_oob_base_already_opened = false;
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|