Remove the orte notifier framework, per discussion at the devel meeting and follow-up with Jeff (who took the action item)
This commit was SVN r26637.
Этот коммит содержится в:
родитель
03f00c42b8
Коммит
e6f3586415
@ -74,7 +74,6 @@ const char *ibv_get_sysfs_path(void);
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
@ -3567,13 +3566,6 @@ error:
|
||||
cq_name[cq], btl_openib_component_status_to_string(wc->status),
|
||||
wc->status, wc->wr_id,
|
||||
wc->opcode, wc->vendor_err, qp));
|
||||
orte_notifier.log_peer(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE,
|
||||
remote_proc ? &remote_proc->proc_name : NULL,
|
||||
"\n\tIB polling %s with status %s "
|
||||
"status number %d for wr_id %llu opcode %d vendor error %d qp_idx %d",
|
||||
cq_name[cq], btl_openib_component_status_to_string(wc->status),
|
||||
wc->status, wc->wr_id,
|
||||
wc->opcode, wc->vendor_err, qp);
|
||||
}
|
||||
|
||||
if (IBV_WC_RNR_RETRY_EXC_ERR == wc->status ||
|
||||
@ -3592,23 +3584,11 @@ error:
|
||||
"srq rnr retry exceeded", true,
|
||||
orte_process_info.nodename, device_name,
|
||||
peer_hostname);
|
||||
orte_notifier.show_help(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE,
|
||||
"help-mpi-btl-openib.txt",
|
||||
BTL_OPENIB_QP_TYPE_PP(qp) ?
|
||||
"pp rnr retry exceeded" :
|
||||
"srq rnr retry exceeded",
|
||||
orte_process_info.nodename, device_name,
|
||||
peer_hostname);
|
||||
} else if (IBV_WC_RETRY_EXC_ERR == wc->status) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"pp retry exceeded", true,
|
||||
orte_process_info.nodename,
|
||||
device_name, peer_hostname);
|
||||
orte_notifier.show_help(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE,
|
||||
"help-mpi-btl-openib.txt",
|
||||
"pp retry exceeded",
|
||||
orte_process_info.nodename,
|
||||
device_name, peer_hostname);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -48,7 +48,6 @@
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
@ -2963,13 +2962,6 @@ error:
|
||||
cq_name[cq], btl_wv_component_status_to_string(wc->Status),
|
||||
wc->Status, wc->WrId,
|
||||
wc->Opcode, wc->VendorCode, qp));
|
||||
orte_notifier.log_peer(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE,
|
||||
remote_proc ? &remote_proc->proc_name : NULL,
|
||||
"\n\tIB polling %s with status %s "
|
||||
"status number %d for wr_id %llu opcode %d vendor error %d qp_idx %d",
|
||||
cq_name[cq], btl_wv_component_status_to_string(wc->Status),
|
||||
wc->Status, wc->WrId,
|
||||
wc->Opcode, wc->VendorCode, qp);
|
||||
}
|
||||
|
||||
if (WvWcRnrRetryError == wc->Status ||
|
||||
@ -2988,23 +2980,11 @@ error:
|
||||
"srq rnr retry exceeded", true,
|
||||
orte_process_info.nodename, device_name,
|
||||
peer_hostname);
|
||||
orte_notifier.show_help(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE,
|
||||
"help-mpi-btl-wv.txt",
|
||||
BTL_WV_QP_TYPE_PP(qp) ?
|
||||
"pp rnr retry exceeded" :
|
||||
"srq rnr retry exceeded",
|
||||
orte_process_info.nodename, device_name,
|
||||
peer_hostname);
|
||||
} else if (-2 == wc->Status) {
|
||||
orte_show_help("help-mpi-btl-wv.txt",
|
||||
"pp retry exceeded", true,
|
||||
orte_process_info.nodename,
|
||||
device_name, peer_hostname);
|
||||
orte_notifier.show_help(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE,
|
||||
"help-mpi-btl-wv.txt",
|
||||
"pp retry exceeded",
|
||||
orte_process_info.nodename,
|
||||
device_name, peer_hostname);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,6 @@
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
#include "ompi/runtime/ompi_cr.h"
|
||||
|
||||
@ -1413,15 +1412,7 @@ void mca_pml_bfo_map_out_btl(struct mca_btl_base_module_t* btl,
|
||||
if (true == remove) {
|
||||
mca_bml.bml_del_proc_btl(errproc, btl);
|
||||
|
||||
orte_notifier.log(ORTE_NOTIFIER_ERROR, ORTE_ERR_COMM_FAILURE,
|
||||
"BTL %s error: rank=%d mapping out %s "
|
||||
"to rank=%d on node=%s",
|
||||
btl->btl_component->btl_version.mca_component_name,
|
||||
ORTE_PROC_MY_NAME->vpid,
|
||||
btlname, errproc->proc_name.vpid,
|
||||
errproc->proc_hostname);
|
||||
|
||||
opal_output_verbose(10, mca_pml_bfo_output,
|
||||
opal_output_verbose(10, mca_pml_bfo_output,
|
||||
"BTL %s error: rank=%d mapping out %s "
|
||||
"to rank=%d on node=%s \n",
|
||||
btl->btl_component->btl_version.mca_component_name,
|
||||
|
@ -37,7 +37,6 @@
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
@ -172,10 +171,6 @@ void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl,
|
||||
if (csum_received != csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'match header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||
"Checksum header violation: job %s file %s line %d",
|
||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||
__FILE__, __LINE__);
|
||||
dump_csum_error_data(segments, 1);
|
||||
orte_errmgr.abort(-1,NULL);
|
||||
}
|
||||
@ -303,10 +298,6 @@ void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl,
|
||||
if (csum_data != hdr->hdr_csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_csum, csum_data);
|
||||
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||
"Checksum data violation: job %s file %s line %d",
|
||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||
__FILE__, __LINE__);
|
||||
dump_csum_error_data(segments, num_segments);
|
||||
orte_errmgr.abort(-1,NULL);
|
||||
}
|
||||
@ -349,10 +340,6 @@ void mca_pml_csum_recv_frag_callback_rndv(mca_btl_base_module_t* btl,
|
||||
if (csum_received != csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'rndv header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||
"Checksum header violation: job %s file %s line %d",
|
||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||
__FILE__, __LINE__);
|
||||
dump_csum_error_data(segments, 1);
|
||||
orte_errmgr.abort(-1,NULL);
|
||||
}
|
||||
@ -409,10 +396,6 @@ void mca_pml_csum_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
||||
if (csum_received != csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'ACK header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||
"Checksum header violation: job %s file %s line %d",
|
||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||
__FILE__, __LINE__);
|
||||
dump_csum_error_data(segments, 1);
|
||||
orte_errmgr.abort(-1,NULL);
|
||||
}
|
||||
@ -473,10 +456,6 @@ void mca_pml_csum_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
||||
if(csum_received != csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'frag header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||
"Checksum header violation: job %s file %s line %d",
|
||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||
__FILE__, __LINE__);
|
||||
dump_csum_error_data(segments, 1);
|
||||
orte_errmgr.abort(-1,NULL);
|
||||
}
|
||||
@ -515,10 +494,6 @@ void mca_pml_csum_recv_frag_callback_put(mca_btl_base_module_t* btl,
|
||||
if(csum_received != csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'PUT header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||
"Checksum header violation: job %s file %s line %d",
|
||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||
__FILE__, __LINE__);
|
||||
dump_csum_error_data(segments, 1);
|
||||
orte_errmgr.abort(-1,NULL);
|
||||
}
|
||||
@ -557,10 +532,6 @@ void mca_pml_csum_recv_frag_callback_fin(mca_btl_base_module_t* btl,
|
||||
if(csum_received != csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'FIN header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||
"Checksum header violation: job %s file %s line %d",
|
||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||
__FILE__, __LINE__);
|
||||
dump_csum_error_data(segments, 1);
|
||||
orte_errmgr.abort(-1,NULL);
|
||||
}
|
||||
|
@ -32,7 +32,6 @@
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/bml/bml.h"
|
||||
@ -498,10 +497,6 @@ void mca_pml_csum_recv_request_progress_frag( mca_pml_csum_recv_request_t* recvr
|
||||
if(csum != hdr->hdr_frag.hdr_csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'frag data\' - received csum:0x%x != computed csum:0x%x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_frag.hdr_csum, csum);
|
||||
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||
"Checksum data violation: job %s file %s line %d",
|
||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||
__FILE__, __LINE__);
|
||||
dump_csum_error_data(segments, num_segments);
|
||||
orte_errmgr.abort(-1,NULL);
|
||||
}
|
||||
@ -645,10 +640,6 @@ void mca_pml_csum_recv_request_progress_rndv( mca_pml_csum_recv_request_t* recvr
|
||||
if (csum != hdr->hdr_match.hdr_csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'rndv data\' - received csum:0x%x != computed csum:0x%x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_match.hdr_csum, csum);
|
||||
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||
"Checksum data violation: job %s file %s line %d",
|
||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||
__FILE__, __LINE__);
|
||||
dump_csum_error_data(segments, num_segments);
|
||||
orte_errmgr.abort(-1,NULL);
|
||||
}
|
||||
@ -706,10 +697,6 @@ void mca_pml_csum_recv_request_progress_match( mca_pml_csum_recv_request_t* recv
|
||||
if (csum != hdr->hdr_match.hdr_csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_match.hdr_csum, csum);
|
||||
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||
"Checksum data violation: job %s file %s line %d",
|
||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||
__FILE__, __LINE__);
|
||||
dump_csum_error_data(segments, num_segments);
|
||||
orte_errmgr.abort(-1,NULL);
|
||||
}
|
||||
|
@ -49,7 +49,6 @@
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
@ -122,17 +121,6 @@ int ompi_mpi_finalize(void)
|
||||
*/
|
||||
(void)mca_pml_base_bsend_detach(NULL, NULL);
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
/* If desired, send a notify message */
|
||||
if (ompi_notify_init_finalize) {
|
||||
orte_notifier.log(ORTE_NOTIFIER_NOTICE,
|
||||
ORTE_SUCCESS,
|
||||
"MPI_FINALIZE:Starting on host %s, pid %d",
|
||||
orte_process_info.nodename,
|
||||
orte_process_info.pid);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Per MPI-2:4.8, we have to free MPI_COMM_SELF before doing
|
||||
anything else in MPI_FINALIZE (to include setting up such that
|
||||
MPI_FINALIZED will return true). */
|
||||
@ -424,16 +412,6 @@ int ompi_mpi_finalize(void)
|
||||
free(ompi_mpi_show_mca_params_file);
|
||||
}
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
/* If desired, send a notify message */
|
||||
if (ompi_notify_init_finalize) {
|
||||
orte_notifier.log(ORTE_NOTIFIER_NOTICE,
|
||||
ORTE_SUCCESS,
|
||||
"MPI_FINALIZE:Finishing on host %s, pid %d",
|
||||
orte_process_info.nodename,
|
||||
orte_process_info.pid);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Leave the RTE */
|
||||
|
||||
|
@ -57,7 +57,6 @@
|
||||
#include "orte/mca/odls/base/base.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mpi/fortran/base/constants.h"
|
||||
@ -467,17 +466,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
goto error;
|
||||
}
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
/* If desired, send a notify message */
|
||||
if (ompi_notify_init_finalize) {
|
||||
orte_notifier.log(ORTE_NOTIFIER_NOTICE,
|
||||
ORTE_SUCCESS,
|
||||
"MPI_INIT:Starting on host %s, pid %d",
|
||||
orte_process_info.nodename,
|
||||
orte_process_info.pid);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* initialize datatypes. This step should be done early as it will
|
||||
* create the local convertor and local arch used in the proc
|
||||
* init.
|
||||
@ -966,16 +954,5 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
}
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
/* If desired, send a notifier message that we've finished MPI_INIT */
|
||||
if (ompi_notify_init_finalize) {
|
||||
orte_notifier.log(ORTE_NOTIFIER_NOTICE,
|
||||
ORTE_SUCCESS,
|
||||
"MPI_INIT:Finishing on host %s, pid %d",
|
||||
orte_process_info.nodename,
|
||||
orte_process_info.pid);
|
||||
}
|
||||
#endif
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
@ -60,7 +60,6 @@ int ompi_mpi_leave_pinned = -1;
|
||||
bool ompi_mpi_leave_pinned_pipeline = false;
|
||||
bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
|
||||
bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
|
||||
bool ompi_notify_init_finalize = true;
|
||||
|
||||
static bool show_default_mca_params = false;
|
||||
static bool show_file_mca_params = false;
|
||||
@ -287,14 +286,6 @@ int ompi_mpi_register_params(void)
|
||||
}
|
||||
}
|
||||
|
||||
/* Do we want notifier messages upon MPI_INIT and MPI_FINALIZE? */
|
||||
|
||||
mca_base_param_reg_int_name("mpi", "notify_init_finalize",
|
||||
"If nonzero, send two notifications during MPI_INIT: one near when MPI_INIT starts, and another right before MPI_INIT finishes, and send 2 notifications during MPI_FINALIZE: one right when MPI_FINALIZE starts, and another near when MPI_FINALIZE finishes.",
|
||||
false, false,
|
||||
(int) ompi_notify_init_finalize, &value);
|
||||
ompi_notify_init_finalize = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -138,12 +138,6 @@ OMPI_DECLSPEC extern bool ompi_have_sparse_group_storage;
|
||||
*/
|
||||
OMPI_DECLSPEC extern bool ompi_use_sparse_group_storage;
|
||||
|
||||
/**
|
||||
* Whether we should send notifications during MPI_INIT and
|
||||
* MPI_FINALIZE or not.
|
||||
*/
|
||||
OMPI_DECLSPEC extern bool ompi_notify_init_finalize;
|
||||
|
||||
/**
|
||||
* Register MCA parameters used by the MPI layer.
|
||||
*
|
||||
|
@ -106,8 +106,6 @@
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "orte/mca/iof/iof.h"
|
||||
#include "orte/mca/iof/base/base.h"
|
||||
#include "orte/mca/oob/oob.h"
|
||||
@ -411,14 +409,6 @@ void ompi_info_open_components(void)
|
||||
opal_pointer_array_add(&component_map, map);
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
if (ORTE_SUCCESS != orte_notifier_base_open()) {
|
||||
goto error;
|
||||
}
|
||||
map = OBJ_NEW(ompi_info_component_map_t);
|
||||
map->type = strdup("notifier");
|
||||
map->components = &orte_notifier_base_components_available;
|
||||
opal_pointer_array_add(&component_map, map);
|
||||
|
||||
if (ORTE_SUCCESS != mca_oob_base_open()) {
|
||||
goto error;
|
||||
}
|
||||
@ -767,7 +757,6 @@ void ompi_info_close_components()
|
||||
(void) orte_snapc_base_close();
|
||||
(void) orte_sstore_base_close();
|
||||
#endif
|
||||
(void) orte_notifier_base_close();
|
||||
(void) orte_filem_base_close();
|
||||
(void) orte_iof_base_close();
|
||||
(void) orte_plm_base_close();
|
||||
|
@ -270,7 +270,6 @@ int main(int argc, char *argv[])
|
||||
opal_pointer_array_add(&mca_types, "ess");
|
||||
opal_pointer_array_add(&mca_types, "grpcomm");
|
||||
opal_pointer_array_add(&mca_types, "db");
|
||||
opal_pointer_array_add(&mca_types, "notifier");
|
||||
|
||||
/* Execute the desired action(s) */
|
||||
|
||||
|
@ -532,7 +532,6 @@ void ompi_info_do_config(bool want_all)
|
||||
char *memprofile;
|
||||
char *memdebug;
|
||||
char *debug;
|
||||
char *log_event;
|
||||
char *mpi_interface_warning;
|
||||
char *cprofiling;
|
||||
char *cxxprofiling;
|
||||
@ -649,7 +648,6 @@ void ompi_info_do_config(bool want_all)
|
||||
memprofile = OPAL_ENABLE_MEM_PROFILE ? "yes" : "no";
|
||||
memdebug = OPAL_ENABLE_MEM_DEBUG ? "yes" : "no";
|
||||
debug = OPAL_ENABLE_DEBUG ? "yes" : "no";
|
||||
log_event = ORTE_WANT_NOTIFIER_LOG_EVENT ? "yes" : "no";
|
||||
mpi_interface_warning = OMPI_WANT_MPI_INTERFACE_WARNING ? "yes" : "no";
|
||||
cprofiling = OMPI_ENABLE_MPI_PROFILING ? "yes" : "no";
|
||||
cxxprofiling = (OMPI_BUILD_CXX_BINDINGS && OMPI_ENABLE_MPI_PROFILING) ? "yes" : "no";
|
||||
@ -982,7 +980,6 @@ void ompi_info_do_config(bool want_all)
|
||||
ompi_info_out("MPI parameter check", "option:mpi-param-check", paramcheck);
|
||||
ompi_info_out("Memory profiling support", "option:mem-profile", memprofile);
|
||||
ompi_info_out("Memory debugging support", "option:mem-debug", memdebug);
|
||||
ompi_info_out("Unusual events notif.", "option:log-event", log_event);
|
||||
ompi_info_out("libltdl support", "option:dlopen", want_libltdl);
|
||||
ompi_info_out("Heterogeneous support", "options:heterogeneous", heterogeneous);
|
||||
ompi_info_out("mpirun default --prefix", "mpirun:prefix_by_default",
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -81,7 +81,6 @@
|
||||
#include "orte/mca/snapc/base/base.h"
|
||||
#include "orte/mca/sstore/sstore.h"
|
||||
#include "orte/mca/sstore/base/base.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/errmgr/base/base.h"
|
||||
@ -300,14 +299,12 @@ void orte_errmgr_base_migrate_state_notify(int state)
|
||||
switch(state) {
|
||||
case ORTE_ERRMGR_MIGRATE_STATE_ERROR:
|
||||
case ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS:
|
||||
orte_notifier.log(ORTE_NOTIFIER_ERROR, state,
|
||||
"%d: Migration failed for process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
opal_output(0, "%d: Migration failed for process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
break;
|
||||
case ORTE_ERRMGR_MIGRATE_STATE_FINISH:
|
||||
orte_notifier.log(ORTE_NOTIFIER_INFO, state,
|
||||
"%d: Migration successful for process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
opal_output(0, "%d: Migration successful for process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
break;
|
||||
|
||||
case ORTE_ERRMGR_MIGRATE_STATE_NONE:
|
||||
@ -331,26 +328,23 @@ void orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_na
|
||||
case ORTE_PROC_STATE_TERMINATED:
|
||||
case ORTE_PROC_STATE_KILLED_BY_CMD:
|
||||
case ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED:
|
||||
orte_notifier.log(ORTE_NOTIFIER_ERROR, state, "%d: Process %s is dead.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid));
|
||||
opal_output(0,, "%d: Process %s is dead.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid));
|
||||
break;
|
||||
|
||||
case ORTE_PROC_STATE_HEARTBEAT_FAILED:
|
||||
orte_notifier.log(ORTE_NOTIFIER_ERROR, state,
|
||||
"%d: Process %s is unreachable.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid));
|
||||
opal_output(0, "%d: Process %s is unreachable.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid));
|
||||
|
||||
case ORTE_PROC_STATE_COMM_FAILED:
|
||||
orte_notifier.log(ORTE_NOTIFIER_WARN, state,
|
||||
"%d: Failed to communicate with process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid));
|
||||
opal_output(0, "%d: Failed to communicate with process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid));
|
||||
break;
|
||||
|
||||
case ORTE_PROC_STATE_CALLED_ABORT:
|
||||
case ORTE_PROC_STATE_FAILED_TO_START:
|
||||
orte_notifier.log(ORTE_NOTIFIER_ERROR, state,
|
||||
"%d: Process %s has called abort.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid));
|
||||
opal_output(0, "%d: Process %s has called abort.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid));
|
||||
break;
|
||||
case ORTE_PROC_STATE_MIGRATING:
|
||||
default:
|
||||
|
@ -41,7 +41,6 @@
|
||||
#include "orte/mca/rmaps/rmaps_types.h"
|
||||
#include "orte/mca/sensor/sensor.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
|
@ -56,7 +56,6 @@
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
#include "orte/runtime/orte_cr.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
@ -251,18 +250,6 @@ int orte_ess_base_app_setup(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup the notifier system */
|
||||
if (ORTE_SUCCESS != (ret = orte_notifier_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_notifer_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_notifier_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_notifer_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* if we are an ORTE app - and not an MPI app - then
|
||||
* we need to barrier here. MPI_Init has its own barrier,
|
||||
* so we don't need to do two of them. However, if we
|
||||
@ -299,9 +286,7 @@ error:
|
||||
}
|
||||
|
||||
int orte_ess_base_app_finalize(void)
|
||||
{
|
||||
orte_notifier_base_close();
|
||||
|
||||
{
|
||||
orte_cr_finalize();
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
|
@ -62,7 +62,6 @@
|
||||
#include "orte/util/regex.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/errmgr/base/base.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "orte/mca/sensor/base/base.h"
|
||||
#include "orte/mca/sensor/sensor.h"
|
||||
#include "orte/mca/state/base/base.h"
|
||||
@ -570,18 +569,6 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup the notifier system */
|
||||
if (ORTE_SUCCESS != (ret = orte_notifier_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_notifer_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_notifier_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_notifer_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup the SENSOR framework */
|
||||
if (ORTE_SUCCESS != (ret = orte_sensor_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
|
@ -58,7 +58,6 @@
|
||||
#include "orte/mca/plm/base/base.h"
|
||||
#include "orte/mca/plm/plm.h"
|
||||
#include "orte/mca/odls/base/base.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "orte/mca/sensor/base/base.h"
|
||||
#include "orte/mca/sensor/sensor.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
@ -613,18 +612,6 @@ static int rte_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup the notifier system */
|
||||
if (ORTE_SUCCESS != (ret = orte_notifier_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_notifer_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_notifier_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_notifer_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup the SENSOR framework */
|
||||
if (ORTE_SUCCESS != (ret = orte_sensor_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
|
@ -1,38 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# main library setup
|
||||
noinst_LTLIBRARIES = libmca_notifier.la
|
||||
libmca_notifier_la_SOURCES =
|
||||
|
||||
# local files
|
||||
headers = notifier.h notifier_event_types.h notifier_event_calls.h
|
||||
|
||||
libmca_notifier_la_SOURCES += $(headers)
|
||||
|
||||
# Conditionally install the header files
|
||||
if WANT_INSTALL_HEADERS
|
||||
ortedir = $(includedir)/openmpi/$(subdir)
|
||||
nobase_orte_HEADERS = $(headers)
|
||||
endif
|
||||
|
||||
include base/Makefile.am
|
||||
|
||||
distclean-local:
|
||||
rm -f base/static-components.h
|
@ -1,33 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
headers += \
|
||||
base/base.h
|
||||
|
||||
libmca_notifier_la_SOURCES += \
|
||||
base/notifier_base_close.c \
|
||||
base/notifier_base_select.c \
|
||||
base/notifier_base_open.c
|
||||
|
||||
if !ORTE_DISABLE_FULL_SUPPORT
|
||||
libmca_notifier_la_SOURCES += \
|
||||
base/notifier_base_wrappers.c \
|
||||
base/notifier_base_events.c
|
||||
endif
|
||||
|
@ -1,121 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file:
|
||||
*/
|
||||
|
||||
#ifndef MCA_NOTIFIER_BASE_H
|
||||
#define MCA_NOTIFIER_BASE_H
|
||||
|
||||
/*
|
||||
* includes
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/class/opal_object.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Type for holding selected module / component pairs
|
||||
*/
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
/* Component */
|
||||
orte_notifier_base_component_t *onbsp_component;
|
||||
/* Module */
|
||||
orte_notifier_base_module_t *onbsp_module;
|
||||
/* Priority */
|
||||
int onbsp_priority;
|
||||
} orte_notifier_base_selected_pair_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(orte_notifier_base_selected_pair_t);
|
||||
|
||||
/*
|
||||
* function definitions
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_notifier_base_open(void);
|
||||
ORTE_DECLSPEC int orte_notifier_base_close(void);
|
||||
ORTE_DECLSPEC int orte_notifier_base_select(void);
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
ORTE_DECLSPEC void orte_notifier_log(orte_notifier_base_severity_t severity,
|
||||
int errcode,
|
||||
const char *msg, ...);
|
||||
ORTE_DECLSPEC void orte_notifier_show_help(orte_notifier_base_severity_t severity,
|
||||
int errcode,
|
||||
const char *file,
|
||||
const char *topic, ...);
|
||||
ORTE_DECLSPEC void orte_notifier_log_peer(orte_notifier_base_severity_t severity,
|
||||
int errcode,
|
||||
orte_process_name_t *peer_proc,
|
||||
const char *msg, ...);
|
||||
ORTE_DECLSPEC const char* orte_notifier_base_sev2str(orte_notifier_base_severity_t severity);
|
||||
ORTE_DECLSPEC char *orte_notifier_base_peer_log(int errcode,
|
||||
orte_process_name_t *peer_proc,
|
||||
const char *msg, va_list ap);
|
||||
|
||||
#if ORTE_WANT_NOTIFIER_LOG_EVENT
|
||||
|
||||
ORTE_DECLSPEC int orte_notifier_base_events_init(void);
|
||||
ORTE_DECLSPEC void orte_notifier_base_events_finalize(void);
|
||||
|
||||
#else /* ORTE_WANT_NOTIFIER_LOG_EVENT */
|
||||
|
||||
#define orte_notifier_base_events_init() do {} while (0)
|
||||
#define orte_notifier_base_events_finalize() do {} while (0)
|
||||
|
||||
#endif /* ORTE_WANT_NOTIFIER_LOG_EVENT */
|
||||
|
||||
/*
|
||||
* global variables in the base
|
||||
* Needs to be declspec'ed for ompi_info and others
|
||||
*/
|
||||
/*
|
||||
* Indication of whether a component was successfully selected or not
|
||||
* (1 component per interface)
|
||||
*/
|
||||
ORTE_DECLSPEC extern bool orte_notifier_base_log_selected;
|
||||
ORTE_DECLSPEC extern bool orte_notifier_base_help_selected;
|
||||
ORTE_DECLSPEC extern bool orte_notifier_base_log_peer_selected;
|
||||
ORTE_DECLSPEC extern bool orte_notifier_base_log_event_selected;
|
||||
/*
|
||||
* Lists of selected modules (1 per interface)
|
||||
*/
|
||||
ORTE_DECLSPEC extern opal_list_t orte_notifier_log_selected_modules;
|
||||
ORTE_DECLSPEC extern opal_list_t orte_notifier_help_selected_modules;
|
||||
ORTE_DECLSPEC extern opal_list_t orte_notifier_log_peer_selected_modules;
|
||||
ORTE_DECLSPEC extern opal_list_t orte_notifier_log_event_selected_modules;
|
||||
/*
|
||||
* That one is a merge of the per interface lists
|
||||
* It is used during finalize phase to finalize only once each selected module
|
||||
*/
|
||||
ORTE_DECLSPEC extern opal_list_t orte_notifier_base_selected_modules;
|
||||
ORTE_DECLSPEC extern int orte_notifier_base_output;
|
||||
ORTE_DECLSPEC extern orte_notifier_base_severity_t orte_notifier_threshold_severity;
|
||||
ORTE_DECLSPEC extern opal_list_t orte_notifier_base_components_available;
|
||||
|
||||
#endif /* !ORTE_DISABLE_FULL_SUPPORT */
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
@ -1,66 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
|
||||
int orte_notifier_base_close(void)
|
||||
{
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
opal_list_item_t *item;
|
||||
orte_notifier_base_selected_pair_t *pair;
|
||||
|
||||
if (orte_notifier_base_log_event_selected) {
|
||||
orte_notifier_base_events_finalize();
|
||||
}
|
||||
|
||||
/* Finalize all the selected modules
|
||||
* orte_notifier_base_selected_modules has been built as a merge of the
|
||||
* per interface selected modules lists, so only going through that list
|
||||
* to invoke the finalize routines is enough.
|
||||
*/
|
||||
for (item = opal_list_remove_first(&orte_notifier_base_selected_modules);
|
||||
NULL != item;
|
||||
item = opal_list_remove_first(&orte_notifier_base_selected_modules)) {
|
||||
pair = (orte_notifier_base_selected_pair_t*) item;
|
||||
if (NULL != pair->onbsp_module->finalize) {
|
||||
pair->onbsp_module->finalize();
|
||||
}
|
||||
free(pair);
|
||||
}
|
||||
OBJ_DESTRUCT(&orte_notifier_base_selected_modules);
|
||||
OBJ_DESTRUCT(&orte_notifier_log_selected_modules);
|
||||
OBJ_DESTRUCT(&orte_notifier_help_selected_modules);
|
||||
OBJ_DESTRUCT(&orte_notifier_log_peer_selected_modules);
|
||||
OBJ_DESTRUCT(&orte_notifier_log_event_selected_modules);
|
||||
|
||||
/* Close all remaining available components */
|
||||
mca_base_components_close(orte_notifier_base_output,
|
||||
&orte_notifier_base_components_available, NULL);
|
||||
#endif
|
||||
|
||||
/* All done */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -1,197 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif /* HAVE_STDLIB_H */
|
||||
#ifdef HAVE_STDIO_H
|
||||
#include <stdio.h>
|
||||
#endif /* HAVE_STDIO_H */
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
|
||||
#include "opal/class/opal_object.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
#if ORTE_WANT_NOTIFIER_LOG_EVENT
|
||||
|
||||
/*
|
||||
* Definitions for the events that are accounted for before being logged.
|
||||
* They are stored in a list to ensure they are all unconditionally traced
|
||||
* out during finalize.
|
||||
*/
|
||||
opal_list_t orte_notifier_events_list;
|
||||
|
||||
|
||||
/*
|
||||
* Log format differs depending on the phase we are in.
|
||||
*/
|
||||
#define ORTE_NOTIFIER_LOG_FORMAT_0 "TIME=%ld MPI_NOTIFIER_EVENT FAMILY=%u JOB=%u VPID=%u HOST=%s EVENT=%d COUNT=%u: %s"
|
||||
|
||||
#define ORTE_NOTIFIER_LOG_FORMAT_1 "TIME=%ld MPI_NOTIFIER_EVENT FAMILY=%u JOB=%u VPID=%u HOST=%s EVENT=%d COUNT=%u (in %ld seconds): %s"
|
||||
|
||||
#define ORTE_NOTIFIER_LOG_FORMAT_2 "TIME=%ld MPI_NOTIFIER_EVENT FAMILY=%u JOB=%u VPID=%u HOST=%s EVENT=%d COUNT=%u (Finalize): %s"
|
||||
|
||||
|
||||
static void orte_notifier_event_construct(orte_notifier_event_t *ev)
|
||||
{
|
||||
ev->ev_cnt = 0;
|
||||
ev->ev_already_traced = 0;
|
||||
ev->ev_msg = NULL;
|
||||
}
|
||||
|
||||
static void orte_notifier_event_destruct(orte_notifier_event_t *ev)
|
||||
{
|
||||
if (NULL != ev->ev_msg) {
|
||||
free(ev->ev_msg);
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_notifier_event_t,
|
||||
opal_list_item_t,
|
||||
orte_notifier_event_construct,
|
||||
orte_notifier_event_destruct);
|
||||
|
||||
|
||||
int orte_notifier_base_events_init(void)
|
||||
{
|
||||
if (!ORTE_PROC_IS_HNP) {
|
||||
OBJ_CONSTRUCT(&orte_notifier_events_list, opal_list_t);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
void orte_notifier_base_events_finalize(void)
|
||||
{
|
||||
orte_notifier_event_t *ev;
|
||||
opal_list_item_t *item;
|
||||
int32_t count;
|
||||
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unconditionally trace any event that has been accounted for
|
||||
*/
|
||||
for (item = opal_list_remove_first(&orte_notifier_events_list);
|
||||
NULL != item;
|
||||
item = opal_list_remove_first(&orte_notifier_events_list)) {
|
||||
ev = (orte_notifier_event_t *) item;
|
||||
if ((count = ev->ev_cnt) && notifier_log_event_enabled()) {
|
||||
notifier_trace_event(ORTE_NOTIFIER_LOG_2, ev->ev_id, count,
|
||||
time(NULL), 0, ev->ev_msg);
|
||||
}
|
||||
OBJ_RELEASE(ev);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&orte_notifier_events_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* log_type indicates whether we are tracing one of the following:
|
||||
* . ORTE_NOTIFIER_LOG_0 --> Very first trace
|
||||
* . ORTE_NOTIFIER_LOG_1 --> Intermediate trace
|
||||
* . ORTE_NOTIFIER_LOG_2 --> during finalize
|
||||
* Depending on the log_type the output format is different.
|
||||
*/
|
||||
void notifier_trace_event(int log_type, int ev_id, int32_t count, time_t t,
|
||||
time_t delay, const char *msg)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
orte_notifier_base_selected_pair_t *pair;
|
||||
orte_process_name_t *pname = ORTE_PROC_MY_NAME;
|
||||
char *out = NULL;
|
||||
|
||||
switch (log_type) {
|
||||
case ORTE_NOTIFIER_LOG_0:
|
||||
asprintf(&out, ORTE_NOTIFIER_LOG_FORMAT_0, t,
|
||||
ORTE_JOB_FAMILY(pname->jobid),
|
||||
ORTE_LOCAL_JOBID(pname->jobid),
|
||||
pname->vpid,
|
||||
orte_process_info.nodename,
|
||||
ev_id,
|
||||
count,
|
||||
msg);
|
||||
break;
|
||||
case ORTE_NOTIFIER_LOG_1:
|
||||
asprintf(&out, ORTE_NOTIFIER_LOG_FORMAT_1, t,
|
||||
ORTE_JOB_FAMILY(pname->jobid),
|
||||
ORTE_LOCAL_JOBID(pname->jobid),
|
||||
pname->vpid,
|
||||
orte_process_info.nodename,
|
||||
ev_id,
|
||||
count,
|
||||
delay,
|
||||
msg);
|
||||
break;
|
||||
case ORTE_NOTIFIER_LOG_2:
|
||||
asprintf(&out, ORTE_NOTIFIER_LOG_FORMAT_2, t,
|
||||
ORTE_JOB_FAMILY(pname->jobid),
|
||||
ORTE_LOCAL_JOBID(pname->jobid),
|
||||
pname->vpid,
|
||||
orte_process_info.nodename,
|
||||
ev_id,
|
||||
count,
|
||||
msg);
|
||||
break;
|
||||
default:
|
||||
asprintf(&out, "UNKNOWN!!!!!!!!!");
|
||||
break;
|
||||
}
|
||||
|
||||
if (NULL == out) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (item = opal_list_get_first(&orte_notifier_log_event_selected_modules);
|
||||
opal_list_get_end(&orte_notifier_log_event_selected_modules) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
pair = (orte_notifier_base_selected_pair_t*) item;
|
||||
if (NULL != pair->onbsp_module->log_event) {
|
||||
pair->onbsp_module->log_event(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void notifier_event_store(orte_notifier_event_t *ev)
|
||||
{
|
||||
opal_list_append(&orte_notifier_events_list, &ev->super);
|
||||
}
|
||||
|
||||
bool notifier_log_event_enabled(void)
|
||||
{
|
||||
return orte_notifier_base_log_event_selected &&
|
||||
(ORTE_NOTIFIER_NOTICE <= orte_notifier_threshold_severity);
|
||||
}
|
||||
|
||||
#endif /* ORTE_WANT_NOTIFIER_LOG_EVENT */
|
@ -1,123 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
/*
|
||||
* The following file was created by configure. It contains extern
|
||||
* statements and the definition of an array of pointers to each
|
||||
* component's public mca_base_component_t struct.
|
||||
*/
|
||||
|
||||
#include "orte/mca/notifier/base/static-components.h"
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
|
||||
/*
|
||||
* Global variables
|
||||
*/
|
||||
int orte_notifier_base_output = -1;
|
||||
orte_notifier_base_severity_t orte_notifier_threshold_severity =
|
||||
ORTE_NOTIFIER_ERROR;
|
||||
opal_list_t orte_notifier_base_components_available;
|
||||
opal_list_t orte_notifier_base_selected_modules;
|
||||
opal_list_t orte_notifier_log_selected_modules;
|
||||
opal_list_t orte_notifier_help_selected_modules;
|
||||
opal_list_t orte_notifier_log_peer_selected_modules;
|
||||
opal_list_t orte_notifier_log_event_selected_modules;
|
||||
|
||||
orte_notifier_API_module_t orte_notifier = {
|
||||
orte_notifier_log,
|
||||
orte_notifier_show_help,
|
||||
orte_notifier_log_peer,
|
||||
};
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Function for finding and opening either all MCA components, or the one
|
||||
* that was specifically requested via a MCA parameter.
|
||||
*/
|
||||
int orte_notifier_base_open(void)
|
||||
{
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
char *level;
|
||||
|
||||
/* Debugging / verbose output. Always have stream open, with
|
||||
verbose set by the mca open system... */
|
||||
orte_notifier_base_output = opal_output_open(NULL);
|
||||
|
||||
/* let the user define a base level of severity to report */
|
||||
mca_base_param_reg_string_name("notifier", "threshold_severity",
|
||||
"Report all events at or above this severity [default: error]",
|
||||
false, false, "error", &level);
|
||||
if (0 == strncasecmp(level, "emerg", strlen("emerg"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_EMERG;
|
||||
} else if (0 == strncasecmp(level, "alert", strlen("alert"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_ALERT;
|
||||
} else if (0 == strncasecmp(level, "crit", strlen("crit"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_CRIT;
|
||||
} else if (0 == strncasecmp(level, "warn", strlen("warn"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_WARN;
|
||||
} else if (0 == strncasecmp(level, "notice", strlen("notice"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_NOTICE;
|
||||
} else if (0 == strncasecmp(level, "info", strlen("info"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_INFO;
|
||||
} else if (0 == strncasecmp(level, "debug", strlen("debug"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_DEBUG;
|
||||
} else if (0 != strncasecmp(level, "error", strlen("error"))) {
|
||||
opal_output(0, "Unknown notifier level");
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
free(level);
|
||||
|
||||
OBJ_CONSTRUCT(&orte_notifier_base_selected_modules, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_notifier_log_selected_modules, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_notifier_help_selected_modules, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_notifier_log_peer_selected_modules, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_notifier_log_event_selected_modules, opal_list_t);
|
||||
|
||||
/* Open up all available components */
|
||||
|
||||
if (ORTE_SUCCESS !=
|
||||
mca_base_components_open("notifier", orte_notifier_base_output,
|
||||
mca_notifier_base_static_components,
|
||||
&orte_notifier_base_components_available,
|
||||
true)) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* All done */
|
||||
#endif
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -1,392 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
|
||||
/* Global variables */
|
||||
/*
|
||||
* orte_notifier_base_XXX_selected is set to true if at least 1 module has
|
||||
* been selected for the notifier XXX API interface.
|
||||
*/
|
||||
bool orte_notifier_base_log_selected = false;
|
||||
bool orte_notifier_base_help_selected = false;
|
||||
bool orte_notifier_base_log_peer_selected = false;
|
||||
bool orte_notifier_base_log_event_selected = false;
|
||||
|
||||
static inline char **orte_notifier_get_include_list(const char *,
|
||||
const char *,
|
||||
char **);
|
||||
static bool orte_notifier_add_module(mca_base_component_t *component,
|
||||
orte_notifier_base_module_t *module,
|
||||
int priority,
|
||||
char **include_list,
|
||||
opal_list_t *selected_modules);
|
||||
|
||||
static void onbsp_construct(orte_notifier_base_selected_pair_t *obj)
|
||||
{
|
||||
obj->onbsp_component = NULL;
|
||||
obj->onbsp_module = NULL;
|
||||
obj->onbsp_priority = -1;
|
||||
}
|
||||
|
||||
static void onbsp_destruct(orte_notifier_base_selected_pair_t *obj)
|
||||
{
|
||||
onbsp_construct(obj);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_notifier_base_selected_pair_t,
|
||||
opal_list_item_t,
|
||||
onbsp_construct,
|
||||
onbsp_destruct);
|
||||
|
||||
|
||||
/**
|
||||
* Function for selecting a set of components from all those that are
|
||||
* available.
|
||||
*
|
||||
* It is possible to select a subset of these components for any interface.
|
||||
* The syntax is the following:
|
||||
* [ -mca notifier <list0> ] [ -mca notifier_log <list1> ]
|
||||
* [ -mca notifier_help <list2> ]
|
||||
* [ -mca notifier_log_peer <list3> ]
|
||||
* [ -mca notifier_log_event <list4> ]
|
||||
* Rules:
|
||||
* . <list0> empty means nothing selected
|
||||
* . <list0> to <list4> = comma separated lists of component names
|
||||
* . <list1> to <list4> may be one of:
|
||||
* . subsets of <list0>
|
||||
* . "none" keyword (means empty)
|
||||
* . 1 of <list1> to <list4> empty means = <list0>
|
||||
* Last point makes it possible to preserve the way it works today
|
||||
*
|
||||
* Examples:
|
||||
* 1)
|
||||
* -mca notifier syslog,smtp
|
||||
* --> syslog and smtp are selected for the log, show_help, log_peer and
|
||||
* log_event interfaces.
|
||||
* 2)
|
||||
* -mca notifier_log syslog
|
||||
* --> no interface is activated, no component is selected
|
||||
* 3)
|
||||
* -mca notifier syslog -mca notifier_help none
|
||||
* -mca notifier_log_peer none
|
||||
* -mca notifier_log_event none
|
||||
* --> only the log interface is activated, with the syslog component
|
||||
* 4)
|
||||
* -mca notifier syslog,smtp,hnp -mca notifier_help syslog
|
||||
* -mca notifier_log_peer smtp
|
||||
* -mca notifier_log_event none
|
||||
* --> the log interface is activated, with the syslog, smtp and hnp
|
||||
* components
|
||||
* the log_help interface is activated, with the syslog component
|
||||
* the log_peer interface is activated, with the smtp component
|
||||
* the log_event interface is not activated
|
||||
*/
|
||||
int orte_notifier_base_select(void)
|
||||
{
|
||||
mca_base_component_list_item_t *cli = NULL;
|
||||
mca_base_component_t *component = NULL;
|
||||
mca_base_module_t *module = NULL;
|
||||
int i, ret, priority, exit_status = ORTE_SUCCESS;
|
||||
opal_list_item_t *item;
|
||||
orte_notifier_base_module_t *nmodule;
|
||||
char **imodules;
|
||||
char **imodules_log, **imodules_help, **imodules_log_peer;
|
||||
char **imodules_log_event = NULL;
|
||||
bool module_needed;
|
||||
|
||||
/*
|
||||
* Register the framework MCA param and look up include list
|
||||
*/
|
||||
imodules = orte_notifier_get_include_list("notifier",
|
||||
"Comma-delimisted list of notifier components to use "
|
||||
"(empty = none)", NULL);
|
||||
if (NULL == imodules) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Also get the include lists for each interface
|
||||
*/
|
||||
imodules_log = orte_notifier_get_include_list("notifier_log",
|
||||
"Comma-delimisted list of notifier components to use "
|
||||
"for orte_notifier_log (empty = all selected)",
|
||||
imodules);
|
||||
|
||||
imodules_help = orte_notifier_get_include_list("notifier_help",
|
||||
"Comma-delimisted list of notifier components to use "
|
||||
"for orte_notifier_show_help (empty = all selected)",
|
||||
imodules);
|
||||
|
||||
imodules_log_peer = orte_notifier_get_include_list("notifier_log_peer",
|
||||
"Comma-delimisted list of notifier components to "
|
||||
"use for orte_notifier_log_peer (empty = all "
|
||||
"selected)", imodules);
|
||||
|
||||
#if ORTE_WANT_NOTIFIER_LOG_EVENT
|
||||
imodules_log_event = orte_notifier_get_include_list("notifier_log_event",
|
||||
"Comma-delimisted list of notifier components to "
|
||||
"use for ORTE_NOTIFIER_LOG_EVENT (empty = all "
|
||||
"selected)",
|
||||
imodules);
|
||||
#endif /* ORTE_WANT_NOTIFIER_LOG_EVENT */
|
||||
|
||||
/* Query all available components and ask if they have a module */
|
||||
for (item = opal_list_get_first(&orte_notifier_base_components_available);
|
||||
opal_list_get_end(&orte_notifier_base_components_available) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
cli = (mca_base_component_list_item_t *) item;
|
||||
component = (mca_base_component_t *) cli->cli_component;
|
||||
|
||||
/* If this component is not in the include list, skip it */
|
||||
for (i = 0; NULL != imodules[i]; ++i) {
|
||||
if (0 == strcmp(imodules[i], component->mca_component_name)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL == imodules[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If there's no query function, skip it */
|
||||
if (NULL == component->mca_query_component) {
|
||||
opal_output_verbose(5, orte_notifier_base_output,
|
||||
"mca:notify:select: Skipping component [%s]. It does not implement a query function",
|
||||
component->mca_component_name );
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Query the component */
|
||||
opal_output_verbose(5, orte_notifier_base_output,
|
||||
"mca:notify:select: Querying component [%s]",
|
||||
component->mca_component_name);
|
||||
ret = component->mca_query_component(&module, &priority);
|
||||
|
||||
/* If no module was returned, then skip component */
|
||||
if (ORTE_SUCCESS != ret || NULL == module) {
|
||||
opal_output_verbose(5, orte_notifier_base_output,
|
||||
"mca:notify:select: Skipping component [%s]. Query failed to return a module",
|
||||
component->mca_component_name );
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If we got a module, initialize it */
|
||||
nmodule = (orte_notifier_base_module_t*) module;
|
||||
if (NULL != nmodule->init) {
|
||||
/* If the module doesn't want to be used, skip it */
|
||||
if (ORTE_SUCCESS != (ret = nmodule->init()) ) {
|
||||
if (ORTE_ERR_NOT_SUPPORTED != ret &&
|
||||
ORTE_ERR_NOT_IMPLEMENTED != ret) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (NULL != nmodule->finalize) {
|
||||
nmodule->finalize();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* OK, one module has been selected for the notifier framework, and
|
||||
* successfully initialized.
|
||||
* Now we have to include it in the per interface selected modules
|
||||
* lists if needed.
|
||||
*/
|
||||
ret = orte_notifier_add_module(component,
|
||||
nmodule,
|
||||
priority,
|
||||
imodules_log,
|
||||
&orte_notifier_log_selected_modules);
|
||||
|
||||
orte_notifier_base_log_selected = orte_notifier_base_log_selected
|
||||
|| ret;
|
||||
/*
|
||||
* This variable is set to check if the module is needed by at least
|
||||
* one interface.
|
||||
*/
|
||||
module_needed = ret;
|
||||
|
||||
ret = orte_notifier_add_module(component,
|
||||
nmodule,
|
||||
priority,
|
||||
imodules_help,
|
||||
&orte_notifier_help_selected_modules);
|
||||
orte_notifier_base_help_selected = orte_notifier_base_help_selected
|
||||
|| ret;
|
||||
module_needed = module_needed || ret;
|
||||
|
||||
ret = orte_notifier_add_module(component,
|
||||
nmodule,
|
||||
priority,
|
||||
imodules_log_peer,
|
||||
&orte_notifier_log_peer_selected_modules);
|
||||
orte_notifier_base_log_peer_selected =
|
||||
orte_notifier_base_log_peer_selected || ret;
|
||||
module_needed = module_needed || ret;
|
||||
|
||||
ret = orte_notifier_add_module(component,
|
||||
nmodule,
|
||||
priority,
|
||||
imodules_log_event,
|
||||
&orte_notifier_log_event_selected_modules);
|
||||
orte_notifier_base_log_event_selected =
|
||||
orte_notifier_base_log_event_selected || ret;
|
||||
module_needed = module_needed || ret;
|
||||
|
||||
/*
|
||||
* If the module is needed by at least one interface:
|
||||
* Unconditionally update the global list that will be used during
|
||||
* the close step. Else unload it.
|
||||
*/
|
||||
if (module_needed) {
|
||||
orte_notifier_add_module(component,
|
||||
nmodule,
|
||||
priority,
|
||||
imodules,
|
||||
&orte_notifier_base_selected_modules);
|
||||
} else {
|
||||
if (NULL != nmodule->finalize) {
|
||||
nmodule->finalize();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (orte_notifier_base_log_event_selected) {
|
||||
/*
|
||||
* This has to be done whatever the selected module. That's why it's
|
||||
* done here.
|
||||
*/
|
||||
orte_notifier_base_events_init();
|
||||
}
|
||||
|
||||
cleanup:
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register an mca param that represents an include list and build that list.
|
||||
*
|
||||
* @param param_name (IN) param name to be registered
|
||||
* @param help_message (IN) help message for that param
|
||||
* @param default_modules (IN) list of module names to be inherited if an
|
||||
* empty include list is provided
|
||||
* @return list of modules names
|
||||
*/
|
||||
static inline char **orte_notifier_get_include_list(const char *param_name,
|
||||
const char *help_message,
|
||||
char **default_modules)
|
||||
{
|
||||
char *include_list = NULL;
|
||||
char **imodules;
|
||||
|
||||
mca_base_param_reg_string_name(param_name, NULL, help_message,
|
||||
false, false, NULL, &include_list);
|
||||
imodules = opal_argv_split(include_list, ',');
|
||||
if (NULL == imodules) {
|
||||
/*
|
||||
* Inherit the default list if nothing specified
|
||||
*/
|
||||
return default_modules;
|
||||
}
|
||||
if (!strcmp(include_list, "none")) {
|
||||
return NULL;
|
||||
}
|
||||
return imodules;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check if a component name belongs to an include list and add it to the
|
||||
* list of selected modules.
|
||||
*
|
||||
* @param component (IN) component to be included
|
||||
* @param module (IN) module to be included
|
||||
* @param priority (IN) module priority
|
||||
* @param include_list (IN) list of module names to go through
|
||||
* @param selected_modules (OUT) list of selected modules to be updated
|
||||
* @return true/false depending on whether the module
|
||||
* has been added or not
|
||||
*/
|
||||
static bool orte_notifier_add_module(mca_base_component_t *component,
|
||||
orte_notifier_base_module_t *module,
|
||||
int priority,
|
||||
char **include_list,
|
||||
opal_list_t *selected_modules)
|
||||
{
|
||||
orte_notifier_base_selected_pair_t *pair, *pair2;
|
||||
char *module_name;
|
||||
opal_list_item_t *item;
|
||||
int i;
|
||||
|
||||
if (NULL == include_list) {
|
||||
return false;
|
||||
}
|
||||
|
||||
module_name = component->mca_component_name;
|
||||
|
||||
/* If this component is not in the include list, skip it */
|
||||
for (i = 0; NULL != include_list[i]; i++) {
|
||||
if (!strcmp(include_list[i], module_name)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL == include_list[i]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Make an item for the list */
|
||||
pair = OBJ_NEW(orte_notifier_base_selected_pair_t);
|
||||
pair->onbsp_component = (orte_notifier_base_component_t*) component;
|
||||
pair->onbsp_module = module;
|
||||
pair->onbsp_priority = priority;
|
||||
|
||||
/* Put it in the list in priority order */
|
||||
for (item = opal_list_get_first(selected_modules);
|
||||
opal_list_get_end(selected_modules) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
pair2 = (orte_notifier_base_selected_pair_t*) item;
|
||||
if (priority > pair2->onbsp_priority) {
|
||||
opal_list_insert_pos(selected_modules, item, &(pair->super));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (opal_list_get_end(selected_modules) == item) {
|
||||
opal_list_append(selected_modules, &(pair->super));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
@ -1,193 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/constants.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/util/error_strings.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
|
||||
void orte_notifier_log(orte_notifier_base_severity_t severity,
|
||||
int errcode, const char *msg, ...)
|
||||
{
|
||||
va_list ap;
|
||||
opal_list_item_t *item;
|
||||
orte_notifier_base_selected_pair_t *pair;
|
||||
|
||||
if (!orte_notifier_base_log_selected) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (item = opal_list_get_first(&orte_notifier_log_selected_modules);
|
||||
opal_list_get_end(&orte_notifier_log_selected_modules) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
pair = (orte_notifier_base_selected_pair_t*) item;
|
||||
if (NULL != pair->onbsp_module->log) {
|
||||
va_start(ap, msg);
|
||||
pair->onbsp_module->log(severity, errcode, msg, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void orte_notifier_show_help(orte_notifier_base_severity_t severity,
|
||||
int errcode, const char *file,
|
||||
const char *topic, ...)
|
||||
{
|
||||
va_list ap;
|
||||
opal_list_item_t *item;
|
||||
orte_notifier_base_selected_pair_t *pair;
|
||||
|
||||
if (!orte_notifier_base_help_selected) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (item = opal_list_get_first(&orte_notifier_help_selected_modules);
|
||||
opal_list_get_end(&orte_notifier_help_selected_modules) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
pair = (orte_notifier_base_selected_pair_t*) item;
|
||||
if (NULL != pair->onbsp_module->help) {
|
||||
va_start(ap, topic);
|
||||
pair->onbsp_module->help(severity, errcode, file, topic, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void orte_notifier_log_peer(orte_notifier_base_severity_t severity,
|
||||
int errcode,
|
||||
orte_process_name_t *peer_proc,
|
||||
const char *msg, ...)
|
||||
{
|
||||
va_list ap;
|
||||
opal_list_item_t *item;
|
||||
orte_notifier_base_selected_pair_t *pair;
|
||||
|
||||
if (!orte_notifier_base_log_peer_selected) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (item = opal_list_get_first(&orte_notifier_log_peer_selected_modules);
|
||||
opal_list_get_end(&orte_notifier_log_peer_selected_modules) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
pair = (orte_notifier_base_selected_pair_t*) item;
|
||||
if (NULL != pair->onbsp_module->peer) {
|
||||
va_start(ap, msg);
|
||||
pair->onbsp_module->peer(severity, errcode, peer_proc, msg, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const char* orte_notifier_base_sev2str(orte_notifier_base_severity_t severity)
|
||||
{
|
||||
switch (severity) {
|
||||
case ORTE_NOTIFIER_EMERG: return "EMERG"; break;
|
||||
case ORTE_NOTIFIER_ALERT: return "ALERT"; break;
|
||||
case ORTE_NOTIFIER_CRIT: return "CRIT"; break;
|
||||
case ORTE_NOTIFIER_ERROR: return "ERROR"; break;
|
||||
case ORTE_NOTIFIER_WARN: return "WARN"; break;
|
||||
case ORTE_NOTIFIER_NOTICE: return "NOTICE"; break;
|
||||
case ORTE_NOTIFIER_INFO: return "INFO"; break;
|
||||
case ORTE_NOTIFIER_DEBUG: return "DEBUG"; break;
|
||||
default: return "UNKNOWN"; break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
char *orte_notifier_base_peer_log(int errcode, orte_process_name_t *peer_proc,
|
||||
const char *msg, va_list ap)
|
||||
{
|
||||
char *buf = (char *) malloc(ORTE_NOTIFIER_MAX_BUF + 1);
|
||||
char *peer_host = NULL, *peer_name = NULL;
|
||||
char *pos = buf;
|
||||
char *errstr;
|
||||
int ret, len, space = ORTE_NOTIFIER_MAX_BUF;
|
||||
|
||||
if (NULL == buf) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (peer_proc) {
|
||||
peer_host = orte_ess.proc_get_hostname(peer_proc);
|
||||
peer_name = ORTE_NAME_PRINT(peer_proc);
|
||||
}
|
||||
|
||||
len = snprintf(pos, space,
|
||||
"While communicating to proc %s on node %s,"
|
||||
" proc %s on node %s encountered an error ",
|
||||
peer_name ? peer_name : "UNKNOWN",
|
||||
peer_host ? peer_host : "UNKNOWN",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename);
|
||||
space -= len;
|
||||
pos += len;
|
||||
|
||||
if (0 < space) {
|
||||
ret = orte_err2str(errcode, (const char **)&errstr);
|
||||
if (ORTE_SUCCESS == ret) {
|
||||
len = snprintf(pos, space, "'%s':", errstr);
|
||||
free(errstr);
|
||||
} else {
|
||||
len = snprintf(pos, space, "(%d):", errcode);
|
||||
}
|
||||
space -= len;
|
||||
pos += len;
|
||||
}
|
||||
|
||||
if (0 < space) {
|
||||
vsnprintf(pos, space, msg, ap);
|
||||
}
|
||||
|
||||
buf[ORTE_NOTIFIER_MAX_BUF] = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
#endif
|
@ -1,49 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
help-orte-notifier-command.txt
|
||||
|
||||
sources = \
|
||||
notifier_command.h \
|
||||
notifier_command_fd.c \
|
||||
notifier_command_child.c \
|
||||
notifier_command_module.c \
|
||||
notifier_command_component.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_notifier_command_DSO
|
||||
component_noinst =
|
||||
component_install = mca_notifier_command.la
|
||||
else
|
||||
component_noinst = libmca_notifier_command.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_notifier_command_la_SOURCES = $(sources)
|
||||
mca_notifier_command_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_notifier_command_la_SOURCES =$(sources)
|
||||
libmca_notifier_command_la_LDFLAGS = -module -avoid-version
|
@ -1,36 +0,0 @@
|
||||
# -*- command-script -*-
|
||||
#
|
||||
# Copyright (c) 2007 Sandia National Laboratories. All rights reserved.
|
||||
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_notifier_command_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_notifier_command_CONFIG], [
|
||||
AC_CONFIG_FILES([orte/mca/notifier/command/Makefile])
|
||||
|
||||
OPAL_VAR_SCOPE_PUSH(notifier_happy)
|
||||
|
||||
notifier_happy=no
|
||||
|
||||
# We need fork() and pipe()
|
||||
AC_CHECK_FUNC([fork],
|
||||
[AC_CHECK_FUNC([pipe], [notifier_happy=yes])])
|
||||
|
||||
# We also need thread support
|
||||
AS_IF([test "$notifier_happy" = "yes"],
|
||||
[AC_MSG_CHECKING([for thread support])
|
||||
AC_MSG_RESULT([$THREAD_TYPE])
|
||||
AS_IF([test "$THREAD_TYPE" != "none"],
|
||||
[notifier_happy=yes])])
|
||||
|
||||
AS_IF([test "$notifier_happy" = "yes" -a "$orte_without_full_support" = 0], [$1], [$2])
|
||||
OPAL_VAR_SCOPE_POP
|
||||
])
|
@ -1,66 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English help file for Open MPI's SMTP notifier support
|
||||
#
|
||||
[command not specified]
|
||||
Error: the Open MPI command notifier component had no command specified.
|
||||
#
|
||||
[bad command]
|
||||
Error: the command notifier component received a bad command in the
|
||||
notifier_command_cmd MCA parameter. This usually means that there
|
||||
are mismatched quotes in the command string. Your MPI job may
|
||||
continue, but the command notifier has been disabled.
|
||||
|
||||
Local host: %s
|
||||
Command: %s
|
||||
#
|
||||
[system call fail]
|
||||
Error: a system call failed during the setup of the command notifier
|
||||
component. Open MPI is now going to abort your job.
|
||||
|
||||
Local host: %s
|
||||
System call: %s
|
||||
Errno: %s (%d)
|
||||
#
|
||||
[grandchild fail]
|
||||
The command notifier process died with a non-zero exit status. This
|
||||
should not happen. Your MPI job will continue, however, and
|
||||
notifications will attempt to continue. But you may only see this
|
||||
message once, even if notifications continue to fail.
|
||||
|
||||
Local host: %s
|
||||
Command: %s
|
||||
Exit status: %s %d
|
||||
#
|
||||
[grandchild did not exit]
|
||||
ERROR: The command notifier process took too long, but was unable to be
|
||||
killed by Open MPI (Open MPI tried killing it with SIGTERM and
|
||||
SIGKILL). This should not happen; you should both check the host
|
||||
where this occurred to see if there are any notifier processes still
|
||||
running, and check your notifier command and ensure that it is
|
||||
functioning properly. Your MPI job will continue, however, and
|
||||
notifications will attempt to continue. But you may only see this
|
||||
message once, even if notifications continue to fail.
|
||||
|
||||
Local host: %s
|
||||
Command: %s
|
||||
Timeout (sec): %d
|
||||
#
|
||||
[grandchild timeout]
|
||||
The command notifier process took too long and was killed by Open MPI.
|
||||
This should not happen; you should check your notifier command and
|
||||
ensure that it is functioning properly. Your MPI job will continue,
|
||||
however, and notifications will attempt to continue. But you may only
|
||||
see this message once, even if notifications continue to fail.
|
||||
|
||||
Local host: %s
|
||||
Command: %s
|
||||
Timeout (sec): %d
|
||||
Exit status: %s %d
|
@ -1,107 +0,0 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
#ifndef NOTIFIER_COMMAND_H
|
||||
#define NOTIFIER_COMMAND_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef struct {
|
||||
orte_notifier_base_component_t super;
|
||||
|
||||
/* Command to execute */
|
||||
char *cmd;
|
||||
|
||||
/* Timeout of the command (seconds) */
|
||||
int timeout;
|
||||
|
||||
/* Priority of this component */
|
||||
int priority;
|
||||
|
||||
/* Child PID */
|
||||
pid_t child_pid;
|
||||
|
||||
/* Pipe to the child */
|
||||
int to_child[2];
|
||||
|
||||
/* Pipe to the parent */
|
||||
int to_parent[2];
|
||||
|
||||
/* Do we want data sent to child via stdin? */
|
||||
bool pass_via_stdin;
|
||||
} orte_notifier_command_component_t;
|
||||
|
||||
|
||||
/*
|
||||
* Notifier interfaces
|
||||
*/
|
||||
ORTE_MODULE_DECLSPEC extern orte_notifier_command_component_t
|
||||
mca_notifier_command_component;
|
||||
extern orte_notifier_base_module_t orte_notifier_command_module;
|
||||
|
||||
/*
|
||||
* Pipe commands
|
||||
*/
|
||||
typedef enum {
|
||||
/* Fork/exec a command */
|
||||
CMD_EXEC,
|
||||
|
||||
/* Time to quit */
|
||||
CMD_TIME_TO_QUIT,
|
||||
|
||||
/* Sentinel value */
|
||||
CMD_MAX
|
||||
} orte_notifier_command_pipe_cmd_t;
|
||||
|
||||
|
||||
/**
|
||||
* Simple blocking function to read a specific number of bytes from an
|
||||
* fd.
|
||||
*/
|
||||
int orte_notifier_command_read_fd(int fd, int len, void *buffer);
|
||||
|
||||
/**
|
||||
* Simple blocking function to write a specific number of bytes to an
|
||||
* fd.
|
||||
*/
|
||||
int orte_notifier_command_write_fd(int fd, int len, void *buffer);
|
||||
|
||||
/**
|
||||
* Main entry point for child
|
||||
*/
|
||||
void orte_notifier_command_child_main(void) __opal_attribute_noreturn__;
|
||||
|
||||
/**
|
||||
* Function to split a spint into argv, honoring quoting, etc. (and do
|
||||
* some error checking of the string)
|
||||
*/
|
||||
int orte_notifier_command_split(const char *cmd, char ***argv);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
@ -1,446 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/**
|
||||
* Note: this file is a little fast-n-loose with OPAL_HAVE_THREADS --
|
||||
* it uses this value in run-time "if" conditionals (vs. compile-time
|
||||
* #if conditionals). We also don't protect including <pthread.h>.
|
||||
* That's because this component currently only compiles on Linux and
|
||||
* Solaris, and both of these OS's have pthreads. Using the run-time
|
||||
* conditionals gives us better compile-time checking, even of code
|
||||
* that isn't activated.
|
||||
*
|
||||
* Note, too, that the functionality in this file does *not* require
|
||||
* all the heavyweight OMPI thread infrastructure (e.g., from
|
||||
* --enable-mpi-thread-multiple or --enable-progress-threads). All work that
|
||||
* is done in a separate progress thread is very carefully segregated
|
||||
* from that of the main thread, and communication back to the main
|
||||
* thread
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <string.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_SYS_WAIT_H
|
||||
#include <sys/wait.h>
|
||||
#endif
|
||||
#ifdef HAVE_TIME_H
|
||||
#include <time.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/threads/threads.h"
|
||||
|
||||
#include "orte/constants.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
#include "notifier_command.h"
|
||||
|
||||
static void diediedie(int status) __opal_attribute_noreturn__;
|
||||
|
||||
/* Structre for holding the argument to stdin_main() */
|
||||
typedef struct {
|
||||
int sat_pipe_fd;
|
||||
int sat_severity;
|
||||
int sat_errcode;
|
||||
char *sat_msg;
|
||||
} stdin_arg_t;
|
||||
|
||||
|
||||
int orte_notifier_command_split(const char *cmd_arg, char ***argv_arg)
|
||||
{
|
||||
int i;
|
||||
char *cmd, *p, *q, *token_start, **argv = NULL;
|
||||
bool in_space, in_quote, in_2quote;
|
||||
|
||||
*argv_arg = NULL;
|
||||
cmd = strdup(cmd_arg);
|
||||
if (NULL == cmd) {
|
||||
return ORTE_ERR_IN_ERRNO;
|
||||
}
|
||||
|
||||
in_space = in_quote = in_2quote = false;
|
||||
for (token_start = p = cmd; '\0' != *p; ++p) {
|
||||
/* If we're in a quoted string, all we're doing it looking for
|
||||
the matching end quote. Note that finding the end quote
|
||||
does not necessarily mean the end of the token! So use the
|
||||
normal "I found a space [outside of a quote]" processing to
|
||||
find the end of the token. */
|
||||
if (in_quote &&
|
||||
('\'' == *p && p > token_start && '\\' != *(p - 1))) {
|
||||
in_quote = false;
|
||||
} else if (in_2quote &&
|
||||
('\"' == *p && p > token_start && '\\' != *(p - 1))) {
|
||||
in_2quote = false;
|
||||
}
|
||||
|
||||
/* If we hit a space, it could be the end of a token -- unless
|
||||
we're already in a series of spaces. */
|
||||
else if (!in_quote && !in_2quote && isspace(*p)) {
|
||||
if (!in_space) {
|
||||
/* We weren't in a series of spaces, so this was the
|
||||
end of a token. Save it. */
|
||||
in_space = true;
|
||||
*p = '\0';
|
||||
opal_argv_append_nosize(&argv, token_start);
|
||||
token_start = p + 1;
|
||||
} else {
|
||||
/* We're in a series of spaces, so just move
|
||||
token_start up to the next character. */
|
||||
token_start = p + 1;
|
||||
}
|
||||
} else {
|
||||
/* We're not in a series of spaces. We only need to check
|
||||
if we find ' or " to start a quoted string (in which
|
||||
case spaces no longer mark the end of a string). */
|
||||
in_space = false;
|
||||
if ('\'' == *p) {
|
||||
in_quote = true;
|
||||
} else if ('"' == *p) {
|
||||
in_2quote = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (in_quote || in_2quote) {
|
||||
free(cmd);
|
||||
opal_argv_free(argv);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Get the last token, if there is one */
|
||||
if (!in_space) {
|
||||
opal_argv_append_nosize(&argv, token_start);
|
||||
}
|
||||
|
||||
/* Replace escapes and non-escaped quotes */
|
||||
for (i = 0; NULL != argv[i]; ++i) {
|
||||
for (p = q = argv[i]; '\0' != *p; ++p) {
|
||||
if ('\\' == *p) {
|
||||
switch (*(p + 1)) {
|
||||
/* For quotes, just copy them over and
|
||||
double-increment p */
|
||||
case '\'': *q = *(p + 1); ++p; break;
|
||||
case '"': *q = *(p + 1); ++p; break;
|
||||
|
||||
/* For other normal escapes, insert the right code
|
||||
and double-increment p */
|
||||
case 'a': *q = '\a'; ++p; break;
|
||||
case 'b': *q = '\b'; ++p; break;
|
||||
case 'f': *q = '\f'; ++p; break;
|
||||
case 'n': *q = '\n'; ++p; break;
|
||||
case 'r': *q = '\r'; ++p; break;
|
||||
case 't': *q = '\t'; ++p; break;
|
||||
case 'v': *q = '\v'; ++p; break;
|
||||
|
||||
/* For un-terminated escape, just put in a \. Do
|
||||
*not* double increment p; it's the end of the
|
||||
string! */
|
||||
case '\0': *q = '\\'; break;
|
||||
|
||||
/* Otherwise, just copy and double increment */
|
||||
default: *q = *p; ++p; break;
|
||||
}
|
||||
++q;
|
||||
} else {
|
||||
/* Don't copy un-escaped quotes */
|
||||
if ('\'' != *p && '"' != *p) {
|
||||
*q = *p;
|
||||
++q;
|
||||
}
|
||||
}
|
||||
}
|
||||
*q = '\0';
|
||||
}
|
||||
|
||||
*argv_arg = argv;
|
||||
free(cmd);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Die nicely
|
||||
*/
|
||||
static void diediedie(int status)
|
||||
{
|
||||
/* We don't really have any way to report anything, so just close
|
||||
the pipe fd and die */
|
||||
close(mca_notifier_command_component.to_child[0]);
|
||||
close(mca_notifier_command_component.to_parent[1]);
|
||||
_exit(status);
|
||||
}
|
||||
|
||||
/*
|
||||
* Main entry point for stdin thread
|
||||
*/
|
||||
static void *stdin_main(opal_object_t *obj)
|
||||
{
|
||||
char *data;
|
||||
opal_thread_t *t = (opal_thread_t*) obj;
|
||||
stdin_arg_t *arg = (stdin_arg_t*) t->t_arg;
|
||||
|
||||
asprintf(&data, "<stdin>\n<notifier severity_int=\"%d\" severity_str=\"%s\" errcode=\"%d\">\n<message>%s</message>\n</notifier>\n</stdin>\n",
|
||||
arg->sat_severity,
|
||||
orte_notifier_base_sev2str((orte_notifier_base_severity_t)arg->sat_severity),
|
||||
arg->sat_errcode,
|
||||
arg->sat_msg);
|
||||
if (NULL != data) {
|
||||
orte_notifier_command_write_fd(arg->sat_pipe_fd,
|
||||
strlen(data) + 1, data);
|
||||
free(data);
|
||||
close(arg->sat_pipe_fd);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop over waiting for a child to die
|
||||
*/
|
||||
static int do_wait(pid_t pid, int timeout, int *status, bool *exited)
|
||||
{
|
||||
pid_t pid2;
|
||||
time_t t1, t2;
|
||||
|
||||
t2 = t1 = time(NULL);
|
||||
*exited = false;
|
||||
while (timeout <= 0 || t2 - t1 < timeout) {
|
||||
pid2 = waitpid(pid, status, WNOHANG);
|
||||
if (pid2 == pid) {
|
||||
*exited = true;
|
||||
return ORTE_SUCCESS;
|
||||
} else if (pid2 < 0 && EINTR != errno) {
|
||||
if (ECHILD == errno) {
|
||||
*exited = true;
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* What else can we do? */
|
||||
diediedie(10);
|
||||
}
|
||||
|
||||
/* Let the child run a bit */
|
||||
usleep(100);
|
||||
t2 = time(NULL);
|
||||
}
|
||||
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fork/exec a command from the parent
|
||||
*/
|
||||
static void do_exec(void)
|
||||
{
|
||||
pid_t pid;
|
||||
bool exited, killed;
|
||||
int sel[3], status;
|
||||
int pipe_to_stdin[2];
|
||||
char *msg, *p, *cmd, **argv = NULL;
|
||||
orte_notifier_command_component_t *c = &mca_notifier_command_component;
|
||||
opal_thread_t stdin_thread;
|
||||
stdin_arg_t arg;
|
||||
|
||||
/* First three items on the pipe are: severity, errcode, and
|
||||
string length (sel = Severity, Errcode, string Length. */
|
||||
if (ORTE_SUCCESS !=
|
||||
orte_notifier_command_read_fd(c->to_child[0], sizeof(sel), sel)) {
|
||||
diediedie(1);
|
||||
}
|
||||
|
||||
/* Malloc out enough space for the string to read */
|
||||
msg = malloc(sel[2] + 1);
|
||||
if (NULL == msg) {
|
||||
diediedie(2);
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS !=
|
||||
orte_notifier_command_read_fd(c->to_child[0], sel[2] + 1, msg)) {
|
||||
diediedie(3);
|
||||
/* What else can we do? */
|
||||
}
|
||||
|
||||
/* We have all the info. Now build up the string command to
|
||||
exec. Do the $<foo> replacements. */
|
||||
cmd = strdup(c->cmd);
|
||||
if ('\0' != *cmd) {
|
||||
char *temp;
|
||||
|
||||
while (NULL != (p = strstr(cmd, "$s"))) {
|
||||
*p = '\0';
|
||||
asprintf(&temp, "%s%d%s", cmd, sel[0], p + 2);
|
||||
free(cmd);
|
||||
cmd = temp;
|
||||
}
|
||||
|
||||
while (NULL != (p = strstr(cmd, "$S"))) {
|
||||
*p = '\0';
|
||||
asprintf(&temp, "%s%s%s", cmd,
|
||||
orte_notifier_base_sev2str((orte_notifier_base_severity_t)sel[0]), p + 2);
|
||||
free(cmd);
|
||||
cmd = temp;
|
||||
}
|
||||
|
||||
while (NULL != (p = strstr(cmd, "$e"))) {
|
||||
*p = '\0';
|
||||
asprintf(&temp, "%s%d%s", cmd, sel[1], p + 2);
|
||||
free(cmd);
|
||||
cmd = temp;
|
||||
}
|
||||
|
||||
while (NULL != (p = strstr(cmd, "$m"))) {
|
||||
*p = '\0';
|
||||
asprintf(&temp, "%s%s%s", cmd, msg, p + 2);
|
||||
free(cmd);
|
||||
cmd = temp;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now break it up into a list of argv */
|
||||
if (ORTE_SUCCESS != orte_notifier_command_split(cmd, &argv)) {
|
||||
diediedie(7);
|
||||
/* What else can we do? */
|
||||
}
|
||||
|
||||
/* Do we need a stdin pipe? */
|
||||
if (mca_notifier_command_component.pass_via_stdin) {
|
||||
if (0 != pipe(pipe_to_stdin)) {
|
||||
diediedie(8);
|
||||
}
|
||||
}
|
||||
|
||||
/* Fork off the child and run the command */
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
diediedie(8);
|
||||
} else if (pid == 0) {
|
||||
int i;
|
||||
int fdmax = sysconf(_SC_OPEN_MAX);
|
||||
close(0);
|
||||
for (i = 3; i < fdmax; ++i) {
|
||||
if (!mca_notifier_command_component.pass_via_stdin ||
|
||||
pipe_to_stdin[0] != i) {
|
||||
close(i);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we have a pipe to stdin, dup it */
|
||||
if (mca_notifier_command_component.pass_via_stdin) {
|
||||
close(pipe_to_stdin[1]);
|
||||
if (0 != pipe_to_stdin[0]) {
|
||||
if (dup2(pipe_to_stdin[0], 0) < 0) {
|
||||
diediedie(13);
|
||||
}
|
||||
close(pipe_to_stdin[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Run it! */
|
||||
execvp(argv[0], argv);
|
||||
/* If we get here, bad */
|
||||
diediedie(9);
|
||||
}
|
||||
|
||||
/* Write down stdin. Start a thread because this has to run in
|
||||
parallel to the timer to kill the grandchild if it runs too
|
||||
long. */
|
||||
if (mca_notifier_command_component.pass_via_stdin) {
|
||||
close(pipe_to_stdin[0]);
|
||||
OBJ_CONSTRUCT(&stdin_thread, opal_thread_t);
|
||||
stdin_thread.t_run = stdin_main;
|
||||
arg.sat_pipe_fd = pipe_to_stdin[1];
|
||||
arg.sat_severity = sel[0];
|
||||
arg.sat_errcode = sel[1];
|
||||
arg.sat_msg = msg;
|
||||
stdin_thread.t_arg = (void *) &arg;
|
||||
if (OPAL_SUCCESS != opal_thread_start(&stdin_thread)) {
|
||||
diediedie(9);
|
||||
}
|
||||
}
|
||||
|
||||
/* Parent: wait for / reap the child. */
|
||||
do_wait(pid, mca_notifier_command_component.timeout, &status, &exited);
|
||||
|
||||
/* If the child didn't die, try killing it nicely. If that fails, kill
|
||||
it dead. */
|
||||
killed = false;
|
||||
if (!exited) {
|
||||
killed = true;
|
||||
kill(pid, SIGTERM);
|
||||
do_wait(pid, mca_notifier_command_component.timeout, &status, &exited);
|
||||
if (!exited) {
|
||||
kill(pid, SIGKILL);
|
||||
do_wait(pid, mca_notifier_command_component.timeout, &status,
|
||||
&exited);
|
||||
}
|
||||
}
|
||||
|
||||
/* Wait for the thread to complete */
|
||||
if (mca_notifier_command_component.pass_via_stdin) {
|
||||
void *ret;
|
||||
|
||||
close(pipe_to_stdin[1]);
|
||||
opal_thread_join(&stdin_thread, &ret);
|
||||
OBJ_DESTRUCT(&stdin_thread);
|
||||
}
|
||||
|
||||
/* Free stuff */
|
||||
free(cmd);
|
||||
free(msg);
|
||||
opal_argv_free(argv);
|
||||
|
||||
/* Handshake back up to the parent: just send the status value
|
||||
back up to the parent and let all interpretation occur up
|
||||
there. */
|
||||
sel[0] = (int) exited;
|
||||
sel[1] = (int) killed;
|
||||
sel[2] = status;
|
||||
if (ORTE_SUCCESS !=
|
||||
orte_notifier_command_write_fd(mca_notifier_command_component.to_parent[1],
|
||||
sizeof(sel), sel)) {
|
||||
diediedie(11);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Main entry point for child
|
||||
*/
|
||||
void orte_notifier_command_child_main(void)
|
||||
{
|
||||
orte_notifier_command_pipe_cmd_t cmd;
|
||||
orte_notifier_command_component_t *c = &mca_notifier_command_component;
|
||||
|
||||
while (1) {
|
||||
/* Block waiting for a command */
|
||||
cmd = CMD_MAX;
|
||||
if (ORTE_SUCCESS !=
|
||||
orte_notifier_command_read_fd(c->to_child[0], sizeof(cmd), &cmd)) {
|
||||
diediedie(4);
|
||||
}
|
||||
|
||||
switch (cmd) {
|
||||
case CMD_EXEC:
|
||||
do_exec();
|
||||
break;
|
||||
|
||||
case CMD_TIME_TO_QUIT:
|
||||
diediedie(0);
|
||||
|
||||
default:
|
||||
diediedie(cmd + 50);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,254 +0,0 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Simple command notifier
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_WAIT_H
|
||||
#include <sys/wait.h>
|
||||
#endif
|
||||
#include <errno.h>
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "opal/util/argv.h"
|
||||
|
||||
#include "orte/constants.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "notifier_command.h"
|
||||
|
||||
static int command_component_query(mca_base_module_t **module, int *priority);
|
||||
static int command_close(void);
|
||||
static int command_register(void);
|
||||
|
||||
|
||||
/*
|
||||
* Struct of function pointers that need to be initialized
|
||||
*/
|
||||
orte_notifier_command_component_t mca_notifier_command_component = {
|
||||
{
|
||||
{
|
||||
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
|
||||
|
||||
"command",
|
||||
|
||||
ORTE_MAJOR_VERSION,
|
||||
ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION,
|
||||
|
||||
NULL,
|
||||
command_close,
|
||||
command_component_query,
|
||||
command_register,
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
},
|
||||
|
||||
/* Command command to run */
|
||||
"/sbin/initlog -f $s -n \"Open MPI\" -s \"$S: $m (errorcode: $e)\"",
|
||||
|
||||
/* Timeout */
|
||||
30,
|
||||
|
||||
/* Priority */
|
||||
10,
|
||||
|
||||
/* PID of child */
|
||||
-1,
|
||||
|
||||
/* To-child pipe FDs */
|
||||
{ -1, -1 },
|
||||
|
||||
/* To-parent pipe FDs */
|
||||
{ -1, -1 },
|
||||
|
||||
/* Pass via stdin? */
|
||||
true,
|
||||
};
|
||||
|
||||
/* Safety to ensure we don't try to write down a dead pipe */
|
||||
static void child_death_cb(pid_t pid, int status, void *data)
|
||||
{
|
||||
if (pid == mca_notifier_command_component.child_pid) {
|
||||
OPAL_OUTPUT((0, "Command notifier: child unexpectedly died! Exited, %d, exitstatus %d", WIFEXITED(status), WEXITSTATUS(status)));
|
||||
mca_notifier_command_component.child_pid = 0;
|
||||
mca_notifier_command_component.to_child[1] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static int command_register(void)
|
||||
{
|
||||
int val;
|
||||
|
||||
mca_base_param_reg_string(&mca_notifier_command_component.super.base_version,
|
||||
"cmd",
|
||||
"Command to execute, with substitution. $s = integer severity; $S = string severity; $e = integer error code; $m = string message",
|
||||
false, false,
|
||||
mca_notifier_command_component.cmd,
|
||||
&mca_notifier_command_component.cmd);
|
||||
|
||||
mca_base_param_reg_int(&mca_notifier_command_component.super.base_version,
|
||||
"timeout",
|
||||
"Timeout (in seconds) of the command",
|
||||
false, false,
|
||||
mca_notifier_command_component.timeout,
|
||||
&mca_notifier_command_component.timeout);
|
||||
|
||||
mca_base_param_reg_int(&mca_notifier_command_component.super.base_version,
|
||||
"use_stdin",
|
||||
"If true, pass parameters to the command via stdin, formatted with trivial XML",
|
||||
false, false,
|
||||
(int) mca_notifier_command_component.pass_via_stdin,
|
||||
&val);
|
||||
mca_notifier_command_component.pass_via_stdin = OPAL_INT_TO_BOOL(val);
|
||||
|
||||
/* Priority */
|
||||
mca_base_param_reg_int(&mca_notifier_command_component.super.base_version,
|
||||
"priority",
|
||||
"Priority of this component",
|
||||
false, false,
|
||||
mca_notifier_command_component.priority,
|
||||
&mca_notifier_command_component.priority);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int command_close(void)
|
||||
{
|
||||
if (NULL != mca_notifier_command_component.cmd) {
|
||||
free(mca_notifier_command_component.cmd);
|
||||
}
|
||||
|
||||
/* Tell the child process to die */
|
||||
if (0 != mca_notifier_command_component.child_pid &&
|
||||
-1 != mca_notifier_command_component.to_child[1]) {
|
||||
orte_notifier_command_pipe_cmd_t cmd = CMD_TIME_TO_QUIT;
|
||||
orte_notifier_command_write_fd(mca_notifier_command_component.to_child[1],
|
||||
sizeof(cmd), &cmd);
|
||||
|
||||
close(mca_notifier_command_component.to_child[1]);
|
||||
mca_notifier_command_component.to_child[1] = -1;
|
||||
|
||||
close(mca_notifier_command_component.to_parent[0]);
|
||||
mca_notifier_command_component.to_parent[0] = -1;
|
||||
}
|
||||
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int command_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
char **argv = NULL;
|
||||
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
|
||||
/* If there's no command, there's no love */
|
||||
if (NULL == mca_notifier_command_component.cmd ||
|
||||
'\0' == mca_notifier_command_component.cmd[0]) {
|
||||
orte_show_help("help-orte-notifier-command.txt",
|
||||
"command not specified", true);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* Attempt to parse the command into argv, just as a basic sanity
|
||||
check to ensure that it seems to be ok. */
|
||||
if (ORTE_SUCCESS !=
|
||||
orte_notifier_command_split(mca_notifier_command_component.cmd, &argv)) {
|
||||
orte_show_help("help-orte-notifier-command.txt",
|
||||
"bad command", true, orte_process_info.nodename,
|
||||
mca_notifier_command_component.cmd);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
opal_argv_free(argv);
|
||||
|
||||
/* Create the pipe to be used (it'll be closed in component
|
||||
close if we're not selected) */
|
||||
if (0 != pipe(mca_notifier_command_component.to_child) ||
|
||||
0 != pipe(mca_notifier_command_component.to_parent)) {
|
||||
int save = errno;
|
||||
orte_show_help("help-orte-notifier-command.txt",
|
||||
"system call fail", true, orte_process_info.nodename,
|
||||
"pipe", save, strerror(save));
|
||||
errno = save;
|
||||
return ORTE_ERR_IN_ERRNO;
|
||||
}
|
||||
|
||||
/* Create the child (it'll be shut down in component close if
|
||||
we're not selected). We create the child very early so that we
|
||||
do it before any MPI networks are initialized that have
|
||||
problems with fork(). The child sits on the other end of a
|
||||
pipe and waits for commands from this main process. Commands
|
||||
include telling the child to fork/exec a proces and shutting
|
||||
down. */
|
||||
mca_notifier_command_component.child_pid = fork();
|
||||
if (mca_notifier_command_component.child_pid < 0) {
|
||||
int save = errno;
|
||||
orte_show_help("help-orte-notifier-command.txt",
|
||||
"system call fail", true, orte_process_info.nodename,
|
||||
"fork", save, strerror(save));
|
||||
errno = save;
|
||||
return ORTE_ERR_IN_ERRNO;
|
||||
}
|
||||
|
||||
/* Child: close all fd's except the reading pipe and call the
|
||||
child main routine */
|
||||
if (0 == mca_notifier_command_component.child_pid) {
|
||||
int i;
|
||||
int fdmax = sysconf(_SC_OPEN_MAX);
|
||||
for (i = 3; i < fdmax; ++i) {
|
||||
if (i != mca_notifier_command_component.to_child[0] &&
|
||||
i != mca_notifier_command_component.to_parent[1]) {
|
||||
close(i);
|
||||
}
|
||||
}
|
||||
|
||||
orte_notifier_command_child_main();
|
||||
/* Never returns */
|
||||
}
|
||||
|
||||
/* Parent: close other ends of pipes */
|
||||
close(mca_notifier_command_component.to_child[0]);
|
||||
close(mca_notifier_command_component.to_parent[1]);
|
||||
|
||||
/* Let's find out if the child unexpectedly dies */
|
||||
orte_wait_cb(mca_notifier_command_component.child_pid, child_death_cb, 0);
|
||||
|
||||
*priority = 10;
|
||||
*module = (mca_base_module_t *) &orte_notifier_command_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -1,82 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/**
|
||||
* Note: this file is a little fast-n-loose with OPAL_HAVE_THREADS --
|
||||
* it uses this value in run-time "if" conditionals (vs. compile-time
|
||||
* #if conditionals). We also don't protect including <pthread.h>.
|
||||
* That's because this component currently only compiles on Linux and
|
||||
* Solaris, and both of these OS's have pthreads. Using the run-time
|
||||
* conditionals gives us better compile-time checking, even of code
|
||||
* that isn't activated.
|
||||
*
|
||||
* Note, too, that the functionality in this file does *not* require
|
||||
* all the heavyweight OMPI thread infrastructure (e.g., from
|
||||
* --enable-mpi-thread-multiple or --enable-progress-threads). All work that
|
||||
* is done in a separate progress thread is very carefully segregated
|
||||
* from that of the main thread, and communication back to the main
|
||||
* thread
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "notifier_command.h"
|
||||
|
||||
|
||||
/*
|
||||
* Simple loop over reading from a fd
|
||||
*/
|
||||
int orte_notifier_command_read_fd(int fd, int len, void *buffer)
|
||||
{
|
||||
int rc;
|
||||
char *b = buffer;
|
||||
|
||||
while (len > 0) {
|
||||
rc = read(fd, b, len);
|
||||
if (rc < 0 && EAGAIN == errno) {
|
||||
continue;
|
||||
} else if (rc > 0) {
|
||||
len -= rc;
|
||||
b += rc;
|
||||
} else {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Simple loop over writing to an fd
|
||||
*/
|
||||
int orte_notifier_command_write_fd(int fd, int len, void *buffer)
|
||||
{
|
||||
int rc;
|
||||
char *b = buffer;
|
||||
|
||||
while (len > 0) {
|
||||
rc = write(fd, b, len);
|
||||
if (rc < 0 && EAGAIN == errno) {
|
||||
continue;
|
||||
} else if (rc > 0) {
|
||||
len -= rc;
|
||||
b += rc;
|
||||
} else {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -1,196 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Send an email upon notifier events.
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <sys/wait.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#ifdef HAVE_STDARG_H
|
||||
#include <stdarg.h>
|
||||
#endif
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SIGNAL_H
|
||||
#include <signal.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_WAIT_H
|
||||
#include <sys/wait.h>
|
||||
#endif
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/error.h"
|
||||
|
||||
#include "orte/constants.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/util/error_strings.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
#include "notifier_command.h"
|
||||
|
||||
|
||||
static void command_log(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap);
|
||||
static void command_help(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename,
|
||||
const char *topic, va_list ap);
|
||||
static void command_peer(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc,
|
||||
const char *msg, va_list ap);
|
||||
|
||||
/* Module */
|
||||
orte_notifier_base_module_t orte_notifier_command_module = {
|
||||
NULL,
|
||||
NULL,
|
||||
command_log,
|
||||
command_help,
|
||||
command_peer,
|
||||
NULL
|
||||
};
|
||||
|
||||
/*
|
||||
* Back-end function to actually tell the child to fork the command
|
||||
*/
|
||||
static int send_command(orte_notifier_base_severity_t severity, int errcode,
|
||||
char *msg)
|
||||
{
|
||||
/* csel = Command, Severity, Errcode, string Length */
|
||||
int rc, csel[4];
|
||||
char *errmsg = NULL;
|
||||
|
||||
csel[0] = CMD_EXEC;
|
||||
csel[1] = severity;
|
||||
csel[2] = errcode;
|
||||
csel[3] = strlen(msg);
|
||||
|
||||
/* Write the severity, errcode, and string length */
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = orte_notifier_command_write_fd(mca_notifier_command_component.to_child[1],
|
||||
sizeof(csel), csel))) {
|
||||
errmsg = "write";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Now write the message itself */
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = orte_notifier_command_write_fd(mca_notifier_command_component.to_child[1],
|
||||
csel[3] + 1, msg))) {
|
||||
errmsg = "write";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Now read back the grandchild's exit status from the child:
|
||||
0 = 0/1 indicating whether the grandchild exited or not
|
||||
1 = 0/1 indicating whether the grandchild timed out/was killed or not
|
||||
2 = exit status returned by waitpid() (only relevant if exited==1) */
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = orte_notifier_command_read_fd(mca_notifier_command_component.to_parent[0],
|
||||
sizeof(int) * 3, csel))) {
|
||||
errmsg = "read";
|
||||
goto error;
|
||||
}
|
||||
/* Did the grandchild exit? */
|
||||
if (0 == csel[0]) {
|
||||
orte_show_help("help-orte-notifier-command.txt",
|
||||
"grandchild did not exit", true,
|
||||
orte_process_info.nodename,
|
||||
mca_notifier_command_component.cmd,
|
||||
mca_notifier_command_component.timeout);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
/* Did the grandchild timeout? */
|
||||
if (1 == csel[1]) {
|
||||
orte_show_help("help-orte-notifier-command.txt",
|
||||
"grandchild timeout", true,
|
||||
orte_process_info.nodename,
|
||||
mca_notifier_command_component.cmd,
|
||||
mca_notifier_command_component.timeout,
|
||||
WIFEXITED(csel[0]) ? "Exit status" : "Signal",
|
||||
WIFEXITED(csel[0]) ? WEXITSTATUS(csel[0]) : WTERMSIG(csel[0]));
|
||||
return ORTE_ERR_TIMEOUT;
|
||||
}
|
||||
|
||||
/* The grandchild exited in less than the timeout -- yay. Did it
|
||||
exit cleanly? */
|
||||
if (WIFEXITED(csel[1]) && 0 == WEXITSTATUS(csel[1])) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* Nope -- didn't exit cleanly, so print a warning. */
|
||||
orte_show_help("help-orte-notifier-command.txt",
|
||||
"grandchild fail", true, orte_process_info.nodename,
|
||||
mca_notifier_command_component.cmd,
|
||||
WIFEXITED(csel[0]) ? "Exit status" : "Signal",
|
||||
WIFEXITED(csel[0]) ? WEXITSTATUS(csel[0]) : WTERMSIG(csel[0]));
|
||||
return ORTE_ERROR;
|
||||
|
||||
error:
|
||||
orte_show_help("help-orte-notifier-command.txt",
|
||||
"system call fail", true, orte_process_info.nodename,
|
||||
errmsg, opal_strerror(rc), rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void command_log(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap)
|
||||
{
|
||||
char *output;
|
||||
|
||||
/* If there was a message, output it */
|
||||
vasprintf(&output, msg, ap);
|
||||
|
||||
if (NULL != output) {
|
||||
send_command(severity, errcode, output);
|
||||
free(output);
|
||||
}
|
||||
}
|
||||
|
||||
static void command_help(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename,
|
||||
const char *topic, va_list ap)
|
||||
{
|
||||
char *output = opal_show_help_vstring(filename, topic, false, ap);
|
||||
|
||||
if (NULL != output) {
|
||||
send_command(severity, errcode, output);
|
||||
free(output);
|
||||
}
|
||||
}
|
||||
|
||||
static void command_peer(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
char *buf = orte_notifier_base_peer_log(errcode, peer_proc, msg, ap);
|
||||
|
||||
if (NULL != buf) {
|
||||
send_command(severity, errcode, buf);
|
||||
free(buf);
|
||||
}
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
dnl -*- shell-script -*-
|
||||
dnl
|
||||
dnl Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
dnl University of Stuttgart. All rights reserved.
|
||||
dnl Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
dnl Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
dnl
|
||||
dnl $HEADER$
|
||||
dnl
|
||||
|
||||
AC_DEFUN([MCA_orte_notifier_CONFIG],[
|
||||
ompi_show_subsubtitle "Pre-configuring the framework notifier"
|
||||
|
||||
AC_MSG_CHECKING([if --enable-notifier-log-event was specified])
|
||||
AC_ARG_ENABLE(notifier-log-event,
|
||||
AC_HELP_STRING([--enable-notifier-log-event],
|
||||
[Enable unusual events notification. (default: disabled)]))
|
||||
if test "$enable_notifier_log_event" = "yes"; then
|
||||
AC_MSG_RESULT([yes])
|
||||
WANT_NOTIFIER_LOG_EVENT=1
|
||||
else
|
||||
AC_MSG_RESULT([no (disabling "notifier-log-event")])
|
||||
WANT_NOTIFIER_LOG_EVENT=0
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED([ORTE_WANT_NOTIFIER_LOG_EVENT],
|
||||
[$WANT_NOTIFIER_LOG_EVENT],
|
||||
[if the notifier_log_event should be enabled])
|
||||
AM_CONDITIONAL([ORTE_WANT_NOTIFIER_LOG_EVENT],
|
||||
[test "$WANT_NOTIFIER_LOG_EVENT" = "1"])
|
||||
|
||||
MCA_CONFIGURE_FRAMEWORK($1, $2, 1)
|
||||
])
|
@ -1,45 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
sources = \
|
||||
notifier_file.h \
|
||||
notifier_file_module.c \
|
||||
notifier_file_component.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_notifier_file_DSO
|
||||
component_noinst =
|
||||
component_install = mca_notifier_file.la
|
||||
else
|
||||
component_noinst = libmca_notifier_file.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_notifier_file_la_SOURCES = $(sources)
|
||||
mca_notifier_file_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_notifier_file_la_SOURCES =$(sources)
|
||||
libmca_notifier_file_la_LDFLAGS = -module -avoid-version
|
@ -1,19 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# MCA_notifier_file_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_notifier_file_CONFIG], [
|
||||
AC_CONFIG_FILES([orte/mca/notifier/file/Makefile])
|
||||
|
||||
AS_IF([test "$orte_without_full_support" = 0],
|
||||
[$1],
|
||||
[$2])
|
||||
])
|
@ -1,15 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English help file for Open MPI's SMTP notifier support
|
||||
#
|
||||
[file name not specified]
|
||||
Error: the Open MPI file notifier component had no file name specified.
|
||||
#
|
@ -1,51 +0,0 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
#ifndef NOTIFIER_FILE_H
|
||||
#define NOTIFIER_FILE_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef struct {
|
||||
orte_notifier_base_component_t super;
|
||||
|
||||
/* File name the traces should be sent to */
|
||||
char *fname;
|
||||
|
||||
/* Priority */
|
||||
int priority;
|
||||
} orte_notifier_file_component_t;
|
||||
|
||||
|
||||
/*
|
||||
* Notifier interfaces
|
||||
*/
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_notifier_file_component_t mca_notifier_file_component;
|
||||
extern orte_notifier_base_module_t orte_notifier_file_module;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* NOTIFIER_FILE_H */
|
@ -1,115 +0,0 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file:
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* includes
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "notifier_file.h"
|
||||
|
||||
|
||||
static int orte_notifier_file_register(void);
|
||||
static int orte_notifier_file_component_query(mca_base_module_t **, int *);
|
||||
static int orte_notifier_file_close(void);
|
||||
|
||||
/*
|
||||
* Struct of function pointers that need to be initialized
|
||||
*/
|
||||
orte_notifier_file_component_t mca_notifier_file_component = {
|
||||
{
|
||||
{
|
||||
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
|
||||
|
||||
"file", /* MCA module name */
|
||||
ORTE_MAJOR_VERSION, /* MCA module major version */
|
||||
ORTE_MINOR_VERSION, /* MCA module minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA module release version */
|
||||
NULL,
|
||||
orte_notifier_file_close,
|
||||
orte_notifier_file_component_query,
|
||||
orte_notifier_file_register,
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
},
|
||||
|
||||
/* Log file name */
|
||||
"wdc",
|
||||
|
||||
/* Priority */
|
||||
10
|
||||
};
|
||||
|
||||
static int orte_notifier_file_register(void)
|
||||
{
|
||||
mca_base_component_t *mcb = &mca_notifier_file_component.super.base_version;
|
||||
orte_notifier_file_component_t *nfc = &mca_notifier_file_component;
|
||||
|
||||
mca_base_param_reg_string(mcb, "name",
|
||||
"File name the traces should be redirected to",
|
||||
false, false, nfc->fname, &nfc->fname);
|
||||
mca_base_param_reg_int(mcb, "priority",
|
||||
"Priority of the file notifier component",
|
||||
false, false, nfc->priority, &nfc->priority);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int orte_notifier_file_close(void)
|
||||
{
|
||||
if (NULL != mca_notifier_file_component.fname) {
|
||||
free(mca_notifier_file_component.fname);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int orte_notifier_file_component_query(mca_base_module_t **module,
|
||||
int *priority)
|
||||
{
|
||||
*priority = mca_notifier_file_component.priority;
|
||||
*module = (mca_base_module_t *)&orte_notifier_file_module;
|
||||
|
||||
if (NULL == mca_notifier_file_component.fname ||
|
||||
!strlen(mca_notifier_file_component.fname)) {
|
||||
orte_show_help("help-orte-notifier-file.txt",
|
||||
"file name not specified", true);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -1,237 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#ifdef HAVE_STDIO_H
|
||||
#include <stdio.h>
|
||||
#endif /* HAVE_STDIO_H */
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#endif /* HAVE_FCNTL_H */
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "notifier_file.h"
|
||||
|
||||
|
||||
/* Static API's */
|
||||
static int init(void);
|
||||
static void finalize(void);
|
||||
static int open_file(void);
|
||||
static void file_log(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap);
|
||||
static void file_helplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename, const char *topic, va_list ap);
|
||||
static void file_peerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap);
|
||||
static void file_eventlog(const char *msg);
|
||||
|
||||
/* Module def */
|
||||
orte_notifier_base_module_t orte_notifier_file_module = {
|
||||
init,
|
||||
finalize,
|
||||
file_log,
|
||||
file_helplog,
|
||||
file_peerlog,
|
||||
file_eventlog
|
||||
};
|
||||
|
||||
static int mylogfd = -1;
|
||||
|
||||
static int init(void)
|
||||
{
|
||||
orte_notifier_file_component_t *comp = &mca_notifier_file_component;
|
||||
|
||||
if (!strcmp(comp->fname, "stdout")) {
|
||||
mylogfd = fileno(stdout);
|
||||
} else if (!strcmp(comp->fname, "stderr")) {
|
||||
mylogfd = fileno(stderr);
|
||||
}
|
||||
/* Don't open in the case of a plain file: wait for the 1st write */
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void finalize(void)
|
||||
{
|
||||
if (-1 == mylogfd || fileno(stderr) == mylogfd
|
||||
|| fileno(stdout) == mylogfd) {
|
||||
return;
|
||||
}
|
||||
close(mylogfd);
|
||||
}
|
||||
|
||||
static int open_file(void)
|
||||
{
|
||||
orte_notifier_file_component_t *comp = &mca_notifier_file_component;
|
||||
char *full_name = NULL;
|
||||
char *fname = NULL;
|
||||
int rc = ORTE_SUCCESS;
|
||||
|
||||
if (-1 != mylogfd) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
asprintf(&fname, "output-%s", comp->fname);
|
||||
if (NULL == fname) {
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
full_name = opal_os_path(false, orte_process_info.job_session_dir,
|
||||
fname, NULL);
|
||||
if (NULL == full_name) {
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
mylogfd = open(full_name, O_CREAT | O_RDWR | O_APPEND, S_IRWXU);
|
||||
if (-1 == mylogfd) {
|
||||
rc = ORTE_ERR_FILE_OPEN_FAILURE;
|
||||
}
|
||||
|
||||
free(full_name);
|
||||
out_err:
|
||||
free(fname);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void file_log(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap)
|
||||
{
|
||||
char *output;
|
||||
char *tmp = NULL;
|
||||
|
||||
/* Add a newline at the end of the format string */
|
||||
asprintf(&tmp, "%s\n", msg);
|
||||
if (NULL == tmp) {
|
||||
return;
|
||||
}
|
||||
|
||||
vasprintf(&output, tmp, ap);
|
||||
free(tmp);
|
||||
|
||||
if (NULL == output) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If not done yet, open the log file */
|
||||
if (-1 == mylogfd) {
|
||||
if (ORTE_SUCCESS != open_file()) {
|
||||
free(output);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
write(mylogfd, output, strlen(output));
|
||||
fflush(NULL);
|
||||
free(output);
|
||||
}
|
||||
|
||||
static void file_helplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename, const char *topic, va_list ap)
|
||||
{
|
||||
char *output = opal_show_help_vstring(filename, topic, false, ap);
|
||||
|
||||
if (NULL == output) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (-1 == mylogfd) {
|
||||
if (ORTE_SUCCESS != open_file()) {
|
||||
free(output);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
write(mylogfd, output, strlen(output));
|
||||
fflush(NULL);
|
||||
free(output);
|
||||
}
|
||||
|
||||
static void file_peerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
char *buf;
|
||||
char *tmp = NULL;
|
||||
|
||||
/* Add a newline at the end of the format string */
|
||||
asprintf(&tmp, "%s\n", msg);
|
||||
if (NULL == tmp) {
|
||||
return;
|
||||
}
|
||||
|
||||
buf = orte_notifier_base_peer_log(errcode, peer_proc, tmp, ap);
|
||||
free(tmp);
|
||||
|
||||
if (NULL == buf) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If not done yet, open the log file */
|
||||
if (-1 == mylogfd) {
|
||||
if (ORTE_SUCCESS != open_file()) {
|
||||
free(buf);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
write(mylogfd, buf, strlen(buf));
|
||||
fflush(NULL);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static void file_eventlog(const char *msg)
|
||||
{
|
||||
char *tmp = NULL;
|
||||
|
||||
/* Add a newline at the end of the string */
|
||||
asprintf(&tmp, "%s\n", msg);
|
||||
if (NULL == tmp) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If not done yet, open the log file */
|
||||
if (-1 == mylogfd) {
|
||||
if (ORTE_SUCCESS != open_file()) {
|
||||
free(tmp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
write(mylogfd, tmp, strlen(tmp));
|
||||
fflush(NULL);
|
||||
free(tmp);
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = $(notifier_ftb_CPPFLAGS)
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
help-orte-notifier-ftb.txt \
|
||||
help-ftb-event-schema.txt
|
||||
|
||||
sources = \
|
||||
notifier_ftb.h \
|
||||
notifier_ftb_module.c \
|
||||
notifier_ftb_component.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_notifier_ftb_DSO
|
||||
component_noinst =
|
||||
component_install = mca_notifier_ftb.la
|
||||
else
|
||||
component_noinst = libmca_notifier_ftb.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_notifier_ftb_la_SOURCES = $(sources)
|
||||
mca_notifier_ftb_la_LDFLAGS = -module -avoid-version $(notifier_ftb_LDFLAGS)
|
||||
mca_notifier_ftb_la_LIBADD = $(notifier_ftb_LIBS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_notifier_ftb_la_SOURCES =$(sources)
|
||||
libmca_notifier_ftb_la_LDFLAGS = -module -avoid-version $(notifier_ftb_LDFLAGS)
|
||||
libmca_notifier_ftb_la_LIBADD = $(notifier_ftb_LIBS)
|
@ -1,76 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2009 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2007 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
dnl Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# ORTE_CHECK_FTB(prefix, [action-if-found], [action-if-not-found])
|
||||
# --------------------------------------------------------
|
||||
# check if FTB (Fault Tolerance Backplane) support can be found. sets prefix_{CPPFLAGS,
|
||||
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
|
||||
# support, otherwise executes action-if-not-found
|
||||
AC_DEFUN([ORTE_CHECK_FTB],[
|
||||
AC_ARG_WITH([ftb],
|
||||
[AC_HELP_STRING([--with-ftb(=DIR)],
|
||||
[Build FTB (Fault Tolerance Backplane) support, searching for libraries in DIR])])
|
||||
AC_ARG_WITH([ftb-libdir],
|
||||
[AC_HELP_STRING([--with-ftb-libdir=DIR],
|
||||
[Search for FTB (Fault Tolerance Backplane) libraries in DIR])])
|
||||
|
||||
AS_IF([test "$with_ftb" != "no"],
|
||||
[AS_IF([test ! -z "$with_ftb" -a "$with_ftb" != "yes"],
|
||||
[ompi_check_ftb_dir="$with_ftb"])
|
||||
AS_IF([test ! -z "$with_ftb_libdir" -a "$with_ftb_libdir" != "yes"],
|
||||
[ompi_check_ftb_libdir="$with_ftb_libdir"])
|
||||
|
||||
OMPI_CHECK_PACKAGE([$1],
|
||||
[libftb.h],
|
||||
[ftb],
|
||||
[FTB_Connect],
|
||||
[],
|
||||
[$ompi_check_ftb_dir],
|
||||
[$ompi_check_ftb_libdir],
|
||||
[ompi_check_ftb_happy="yes"],
|
||||
[ompi_check_ftb_happy="no"])
|
||||
],
|
||||
[ompi_check_ftb_happy="no"])
|
||||
|
||||
AS_IF([test "$ompi_check_ftb_happy" = "yes"],
|
||||
[$2],
|
||||
[AS_IF([test ! -z "$with_ftb" -a "$with_ftb" != "no"],
|
||||
[AC_MSG_ERROR([FTB (Fault Tolerance Backplane) support requested but not found. Aborting])])
|
||||
$3])
|
||||
])
|
||||
|
||||
|
||||
# MCA_notifier_ftb_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_notifier_ftb_CONFIG], [
|
||||
AC_CONFIG_FILES([orte/mca/notifier/ftb/Makefile])
|
||||
|
||||
ORTE_CHECK_FTB([notifier_ftb],
|
||||
[notifier_ftb_happy="yes"],
|
||||
[notifier_ftb_happy="no"])
|
||||
|
||||
AS_IF([test "$notifier_ftb_happy" = "yes" -a "$orte_without_full_support" = 0],
|
||||
[notifier_ftb_WRAPPER_EXTRA_LDFLAGS="$notifier_ftb_LDFLAGS"
|
||||
notifier_ftb_WRAPPER_EXTRA_LIBS="$notifier_ftb_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# substitute in the things needed to build ftb
|
||||
AC_SUBST([notifier_ftb_CFLAGS])
|
||||
AC_SUBST([notifier_ftb_CPPFLAGS])
|
||||
AC_SUBST([notifier_ftb_LDFLAGS])
|
||||
AC_SUBST([notifier_ftb_LIBS])
|
||||
])dnl
|
@ -1,29 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2010 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology Corporation.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the FTB event schema file for Open MPI's FTB notifier
|
||||
#
|
||||
start
|
||||
|
||||
ftb.mpi.openmpi
|
||||
|
||||
FTB_MPI_PROCS_DEAD ERROR
|
||||
FTB_MPI_PROCS_UNREACHABLE ERROR
|
||||
FTB_MPI_PROCS_COMM_ERROR WARNING
|
||||
FTB_MPI_PROCS_MIGRATED INFO
|
||||
FTB_MPI_PROCS_MIGRATE_FAIL ERROR
|
||||
FTB_MPI_PROCS_CKPTED INFO
|
||||
FTB_MPI_PROCS_CKPT_FAIL ERROR
|
||||
FTB_MPI_PROCS_RESTARTED INFO
|
||||
FTB_MPI_PROCS_RESTART_FAIL ERROR
|
||||
FTB_MPI_PROCS_ABORTED ERROR
|
||||
|
||||
end
|
@ -1,45 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2009 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology Corporation.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English help file for Open MPI's FTB notifier support
|
||||
#
|
||||
[ftb connect failed]
|
||||
Open MPI's FTB notifier component failed to connect to the FTB server.
|
||||
Check if the FTB bootstrap server is running or not. For further help,
|
||||
refer the FTB documentation (Section 4.0: RUNNING FTB).
|
||||
|
||||
Reason: %s (errno: %d)
|
||||
#
|
||||
[declare events failed]
|
||||
The Open MPI FTB notifier component failed to declare publishable events
|
||||
to the FTB.
|
||||
|
||||
Reason: %s (errno: %d)
|
||||
#
|
||||
[publish failed]
|
||||
Sorry, Open MPI's FTB component failed to publish the following event to
|
||||
the FTB.
|
||||
|
||||
Reason: %s (errno: %d)
|
||||
Event info: [%s] %s
|
||||
Event properties: %s (errno: %d)
|
||||
#
|
||||
[invalid subscription style]
|
||||
Error: the Open MPI FTB component tried to register with an invalid
|
||||
FTB client subscription style.
|
||||
|
||||
Subscription style: %s
|
||||
#
|
||||
[invalid value]
|
||||
Error: the Open MPI FTB notifier component tried to register with an
|
||||
invalid value in the FTB client information.
|
||||
#
|
@ -1,61 +0,0 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
#ifndef NOTIFIER_FTB_H
|
||||
#define NOTIFIER_FTB_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
#include "libftb.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef struct {
|
||||
orte_notifier_base_component_t super;
|
||||
|
||||
/* FTB client subscription style */
|
||||
char *subscription_style;
|
||||
|
||||
/* Priority of this component */
|
||||
int priority;
|
||||
} orte_notifier_ftb_component_t;
|
||||
|
||||
/* Notifier interfaces */
|
||||
ORTE_MODULE_DECLSPEC extern orte_notifier_ftb_component_t mca_notifier_ftb_component;
|
||||
extern orte_notifier_base_module_t orte_notifier_ftb_module;
|
||||
|
||||
/* FTB client information */
|
||||
extern FTB_client_t ftb_client_info;
|
||||
extern FTB_client_handle_t ftb_client_handle;
|
||||
|
||||
/* FTB event types */
|
||||
typedef enum {
|
||||
FTB_EVENT_NORMAL = 1,
|
||||
FTB_EVENT_RESPONSE = 2
|
||||
} ftb_event_type_t;
|
||||
|
||||
/* Returns the FTB event name (as a string) given the event code */
|
||||
#define FTB_EVENT(errnum) #errnum
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
@ -1,160 +0,0 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file:
|
||||
*
|
||||
* This component proxies notification events to the Fault Tolerant
|
||||
* Backplane (See http://www.mcs.anl.gov/research/cifts/).
|
||||
* The ORTE notifier severity is translated to the corresponding
|
||||
* FTB severity before the event is published to the FTB.
|
||||
*/
|
||||
|
||||
/*
|
||||
* includes
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "notifier_ftb.h"
|
||||
|
||||
static int orte_notifier_ftb_close(void);
|
||||
static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority);
|
||||
static int orte_notifier_ftb_register(void);
|
||||
|
||||
/*
|
||||
* Struct of function pointers that need to be initialized
|
||||
*/
|
||||
orte_notifier_ftb_component_t mca_notifier_ftb_component = {
|
||||
{
|
||||
{
|
||||
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
|
||||
|
||||
"ftb", /* MCA module name */
|
||||
ORTE_MAJOR_VERSION, /* MCA module major version */
|
||||
ORTE_MINOR_VERSION, /* MCA module minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA module release version */
|
||||
|
||||
NULL,
|
||||
orte_notifier_ftb_close, /* module close */
|
||||
orte_notifier_ftb_component_query, /* module query */
|
||||
orte_notifier_ftb_register, /* module register */
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
},
|
||||
|
||||
/* FTB client subscription style */
|
||||
"FTB_SUBSCRIPTION_NONE",
|
||||
|
||||
/* Priority */
|
||||
10,
|
||||
};
|
||||
|
||||
static int orte_notifier_ftb_close(void)
|
||||
{
|
||||
|
||||
if (NULL != mca_notifier_ftb_component.subscription_style) {
|
||||
free(mca_notifier_ftb_component.subscription_style);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int orte_notifier_ftb_component_query(mca_base_module_t **module,
|
||||
int *priority)
|
||||
{
|
||||
int ret;
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
|
||||
/* Fill the FTB client information structure */
|
||||
memset(&ftb_client_info, 0, sizeof(ftb_client_info));
|
||||
strcpy(ftb_client_info.event_space, "ftb.mpi.openmpi");
|
||||
|
||||
/* We represent each client with a client name of the form
|
||||
openmpi/<hostname>/<PID> as a unique identifier in the
|
||||
FTB client namespace */
|
||||
sprintf(ftb_client_info.client_name, "ompi%u", orte_process_info.pid);
|
||||
|
||||
sprintf(ftb_client_info.client_jobid, "%u", ORTE_PROC_MY_NAME->jobid);
|
||||
|
||||
strncpy(ftb_client_info.client_subscription_style,
|
||||
mca_notifier_ftb_component.subscription_style,
|
||||
strlen(mca_notifier_ftb_component.subscription_style));
|
||||
|
||||
/* We try to connect to the FTB backplane now, and we abort
|
||||
if we cannot connect for some reason. */
|
||||
if (FTB_SUCCESS != (ret = FTB_Connect(&ftb_client_info, &ftb_client_handle))) {
|
||||
switch (ret) {
|
||||
case FTB_ERR_SUBSCRIPTION_STYLE:
|
||||
orte_show_help("help-orte-notifier-ftb.txt",
|
||||
"invalid subscription style",
|
||||
true, ftb_client_info.client_subscription_style);
|
||||
break;
|
||||
|
||||
case FTB_ERR_INVALID_VALUE:
|
||||
orte_show_help("help-orte-notifier-ftb.txt",
|
||||
"invalid value",
|
||||
true);
|
||||
break;
|
||||
|
||||
default:
|
||||
orte_show_help("help-orte-notifier-ftb.txt",
|
||||
"ftb connect failed",
|
||||
true);
|
||||
}
|
||||
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
*priority = 10;
|
||||
*module = (mca_base_module_t *)&orte_notifier_ftb_module;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int orte_notifier_ftb_register(void)
|
||||
{
|
||||
|
||||
/* FTB client subscription style */
|
||||
mca_base_param_reg_string(&mca_notifier_ftb_component.super.base_version,
|
||||
"subscription_style",
|
||||
"FTB client subscription style. "
|
||||
"Possible values are none, polling, notify and both (polling and notify).",
|
||||
false, false,
|
||||
mca_notifier_ftb_component.subscription_style,
|
||||
&mca_notifier_ftb_component.subscription_style);
|
||||
|
||||
/* Priority */
|
||||
mca_base_param_reg_int(&mca_notifier_ftb_component.super.base_version,
|
||||
"priority",
|
||||
"Priority of this component",
|
||||
false, false,
|
||||
mca_notifier_ftb_component.priority,
|
||||
&mca_notifier_ftb_component.priority);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -1,294 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
#ifdef HAVE_STDARG_H
|
||||
#include <stdarg.h>
|
||||
#endif
|
||||
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/os_path.h"
|
||||
|
||||
#include "orte/mca/plm/base/plm_private.h"
|
||||
#include "orte/mca/plm/plm.h"
|
||||
#include "orte/mca/sensor/sensor.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/snapc/snapc.h"
|
||||
#include "orte/mca/snapc/base/base.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/errmgr/base/base.h"
|
||||
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "notifier_ftb.h"
|
||||
|
||||
/* Static API's */
|
||||
static int init(void);
|
||||
static void finalize(void);
|
||||
static void ftb_log(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap);
|
||||
static void ftb_help(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename, const char *topic, va_list ap);
|
||||
static void ftb_peer(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap);
|
||||
|
||||
/* Module def */
|
||||
orte_notifier_base_module_t orte_notifier_ftb_module = {
|
||||
init,
|
||||
finalize,
|
||||
ftb_log,
|
||||
ftb_help,
|
||||
ftb_peer,
|
||||
NULL
|
||||
};
|
||||
|
||||
/* FTB client information */
|
||||
FTB_client_t ftb_client_info;
|
||||
|
||||
/* FTB client handle */
|
||||
FTB_client_handle_t ftb_client_handle;
|
||||
|
||||
static int init(void) {
|
||||
int ret;
|
||||
char *schema_file;
|
||||
|
||||
/* Locate the FTB events schema file */
|
||||
if (NULL == (schema_file = opal_os_path(false, opal_install_dirs.pkgdatadir,
|
||||
"help-ftb-event-schema.txt", NULL))) {
|
||||
schema_file = strdup("help-ftb-event-schema.txt");
|
||||
}
|
||||
|
||||
/* Declare the Open MPI publishable events to the FTB */
|
||||
ret = FTB_Declare_publishable_events(ftb_client_handle, schema_file, NULL, 0);
|
||||
free(schema_file);
|
||||
|
||||
if (FTB_SUCCESS != ret) {
|
||||
orte_show_help("help-orte-notifier-ftb.txt", "declare events failed", true,
|
||||
"FTB_Declare_publishable_events() failed", ret);
|
||||
|
||||
FTB_Disconnect(ftb_client_handle);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void finalize(void) {
|
||||
/* If the FTB client handle is valid, disconnect the client from FTB. */
|
||||
if (1 == ftb_client_handle.valid) {
|
||||
FTB_Disconnect(ftb_client_handle);
|
||||
}
|
||||
}
|
||||
|
||||
static const char* get_ftb_event_severity(orte_notifier_base_severity_t severity)
|
||||
{
|
||||
switch (severity) {
|
||||
case ORTE_NOTIFIER_EMERG:
|
||||
case ORTE_NOTIFIER_ALERT:
|
||||
return "ALL";
|
||||
case ORTE_NOTIFIER_CRIT:
|
||||
return "FATAL";
|
||||
case ORTE_NOTIFIER_ERROR:
|
||||
return "ERROR";
|
||||
case ORTE_NOTIFIER_WARN:
|
||||
case ORTE_NOTIFIER_NOTICE:
|
||||
return "WARNING";
|
||||
case ORTE_NOTIFIER_INFO:
|
||||
case ORTE_NOTIFIER_DEBUG:
|
||||
return "INFO";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
static const char* get_ftb_event_name(int errnum)
|
||||
{
|
||||
/* Handle checkpoint/restart and migration events */
|
||||
if ( CHECK_ORTE_SNAPC_CKPT_STATE(errnum) ) {
|
||||
errnum = ORTE_SNAPC_CKPT_STATE(errnum);
|
||||
switch (errnum) {
|
||||
case ORTE_SNAPC_CKPT_STATE_ESTABLISHED:
|
||||
return FTB_EVENT(FTB_MPI_PROCS_CKPTED);
|
||||
|
||||
case ORTE_SNAPC_CKPT_STATE_NO_CKPT:
|
||||
case ORTE_SNAPC_CKPT_STATE_ERROR:
|
||||
return FTB_EVENT(FTB_MPI_PROCS_CKPT_FAIL);
|
||||
|
||||
/* Restart events */
|
||||
case ORTE_SNAPC_CKPT_STATE_RECOVERED:
|
||||
return FTB_EVENT(FTB_MPI_PROCS_RESTARTED);
|
||||
|
||||
case ORTE_SNAPC_CKPT_STATE_NO_RESTART:
|
||||
return FTB_EVENT(FTB_MPI_PROCS_RESTART_FAIL);
|
||||
|
||||
/* Process migration events */
|
||||
case ORTE_ERRMGR_MIGRATE_STATE_FINISH:
|
||||
return FTB_EVENT(FTB_MPI_PROCS_MIGRATED);
|
||||
|
||||
case ORTE_ERRMGR_MIGRATE_STATE_ERROR:
|
||||
case ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS:
|
||||
return FTB_EVENT(FTB_MPI_PROCS_MIGRATE_FAIL);
|
||||
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
/* Handle process and communication failure events */
|
||||
switch (errnum) {
|
||||
case ORTE_ERR_CONNECTION_REFUSED:
|
||||
case ORTE_ERR_CONNECTION_FAILED:
|
||||
case ORTE_ERR_UNREACH:
|
||||
case ORTE_PROC_STATE_HEARTBEAT_FAILED:
|
||||
return FTB_EVENT(FTB_MPI_PROCS_UNREACHABLE);
|
||||
|
||||
case ORTE_ERR_COMM_FAILURE:
|
||||
case ORTE_PROC_STATE_COMM_FAILED:
|
||||
return FTB_EVENT(FTB_MPI_PROCS_COMM_ERROR);
|
||||
|
||||
case ORTE_PROC_STATE_FAILED_TO_START:
|
||||
case ORTE_PROC_STATE_CALLED_ABORT:
|
||||
return FTB_EVENT(FTB_MPI_PROCS_ABORTED);
|
||||
|
||||
case ORTE_PROC_STATE_ABORTED:
|
||||
case ORTE_PROC_STATE_ABORTED_BY_SIG:
|
||||
case ORTE_PROC_STATE_TERM_WO_SYNC:
|
||||
case ORTE_PROC_STATE_TERMINATED:
|
||||
case ORTE_PROC_STATE_KILLED_BY_CMD:
|
||||
return FTB_EVENT(FTB_MPI_PROCS_DEAD);
|
||||
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Extracts the FTB payload (inside the brackets []) from notifier
|
||||
* message payload.
|
||||
* For instance: "<FTB message [payload]>" would return "payload".
|
||||
*/
|
||||
static unsigned int extract_payload(char *dest, char *src, unsigned int size)
|
||||
{
|
||||
unsigned int ret;
|
||||
char *lbrace, *rbrace;
|
||||
rbrace = strrchr(src, ']');
|
||||
lbrace = strchr(src, '[');
|
||||
|
||||
if (NULL == rbrace || NULL == lbrace) {
|
||||
strncpy(dest, src, size);
|
||||
ret = size;
|
||||
} else {
|
||||
ret = rbrace - lbrace + 1;
|
||||
if (ret > size) {
|
||||
ret = size;
|
||||
}
|
||||
strncpy(dest, lbrace, ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void publish_ftb_event(orte_notifier_base_severity_t severity, int errcode,
|
||||
FTB_event_properties_t *eprop)
|
||||
{
|
||||
int ret;
|
||||
const char *event_name;
|
||||
FTB_event_handle_t ehandle;
|
||||
|
||||
/* Publish the event to the Fault Tolerant Backplane */
|
||||
event_name = get_ftb_event_name(errcode);
|
||||
if (NULL != event_name) {
|
||||
ret = FTB_Publish(ftb_client_handle, event_name, eprop, &ehandle);
|
||||
if (FTB_SUCCESS != ret) {
|
||||
orte_show_help("help-orte-notifier-ftb.txt", "publish failed", true,
|
||||
"FTB_Publish() failed", ret, get_ftb_event_severity(severity),
|
||||
event_name, eprop->event_payload, errcode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ftb_log(orte_notifier_base_severity_t severity, int errcode, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
char *payload;
|
||||
FTB_event_properties_t ev_prop;
|
||||
|
||||
/* Only normal FTB events are supported currently. */
|
||||
ev_prop.event_type = (int) FTB_EVENT_NORMAL;
|
||||
|
||||
/* Copy the event payload, if we have one */
|
||||
vasprintf(&payload, msg, ap);
|
||||
if (NULL != payload) {
|
||||
extract_payload(ev_prop.event_payload, payload, FTB_MAX_PAYLOAD_DATA);
|
||||
free(payload);
|
||||
publish_ftb_event(severity, errcode, &ev_prop);
|
||||
}
|
||||
}
|
||||
|
||||
static void ftb_help(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename, const char *topic, va_list ap)
|
||||
{
|
||||
char *payload;
|
||||
FTB_event_properties_t ev_prop;
|
||||
|
||||
/* Only normal FTB events are supported currently. */
|
||||
ev_prop.event_type = (int) FTB_EVENT_NORMAL;
|
||||
|
||||
payload = opal_show_help_vstring(filename, topic, false, ap);
|
||||
if (NULL != payload) {
|
||||
extract_payload(ev_prop.event_payload, payload, FTB_MAX_PAYLOAD_DATA);
|
||||
free(payload);
|
||||
publish_ftb_event(severity, errcode, &ev_prop);
|
||||
}
|
||||
}
|
||||
|
||||
static void ftb_peer(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
char *payload, *peer_host;
|
||||
FTB_event_properties_t ev_prop;
|
||||
|
||||
/* Only normal FTB events are supported currently. */
|
||||
ev_prop.event_type = (int) FTB_EVENT_NORMAL;
|
||||
|
||||
peer_host = NULL;
|
||||
if (peer_proc) {
|
||||
peer_host = orte_ess.proc_get_hostname(peer_proc);
|
||||
/* Ignore the peer_host for now. */
|
||||
}
|
||||
|
||||
vasprintf(&payload, msg, ap);
|
||||
if (NULL != payload) {
|
||||
extract_payload(ev_prop.event_payload, payload, FTB_MAX_PAYLOAD_DATA);
|
||||
free(payload);
|
||||
publish_ftb_event(severity, errcode, &ev_prop);
|
||||
}
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
EXTRA_DIST = orte_notifier_hnp.txt
|
||||
|
||||
sources = \
|
||||
notifier_hnp.h \
|
||||
notifier_hnp_module.c \
|
||||
notifier_hnp_recv.c \
|
||||
notifier_hnp_component.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_notifier_hnp_DSO
|
||||
component_noinst =
|
||||
component_install = mca_notifier_hnp.la
|
||||
else
|
||||
component_noinst = libmca_notifier_hnp.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_notifier_hnp_la_SOURCES = $(sources)
|
||||
mca_notifier_hnp_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_notifier_hnp_la_SOURCES =$(sources)
|
||||
libmca_notifier_hnp_la_LDFLAGS = -module -avoid-version
|
@ -1,19 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# MCA_notifier_hnp_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_notifier_hnp_CONFIG], [
|
||||
AC_CONFIG_FILES([orte/mca/notifier/hnp/Makefile])
|
||||
|
||||
AS_IF([test "$orte_without_full_support" = 0],
|
||||
[$1],
|
||||
[$2])
|
||||
])
|
@ -1,51 +0,0 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
#ifndef NOTIFIER_HNP_H
|
||||
#define NOTIFIER_HNP_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/types.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
void orte_notifier_hnp_recv_cb(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata);
|
||||
|
||||
/*
|
||||
extern opal_pointer_array_t orte_notifier_hnp_tables;
|
||||
extern opal_mutex_t orte_notifier_hnp_tables_lock;
|
||||
*/
|
||||
|
||||
/*
|
||||
* Notifier interfaces
|
||||
*/
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_notifier_base_component_t mca_notifier_hnp_component;
|
||||
extern orte_notifier_base_module_t orte_notifier_hnp_module;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
@ -1,71 +0,0 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* This component proxies notification messages up to the HNP. This
|
||||
* component runs in both the HNP and non-HNP processes for ease of
|
||||
* selection (e.g., so you can "--mca notifier hnp" (vs. "--mca
|
||||
* notifier hnp,non_hnp"). It auto-detects where it is running and
|
||||
* does the Right Thing -- if it's in the HNP process, it sets up to
|
||||
* receive incoming proxied messages. If it's not in the HNP, then it
|
||||
* proxies all messages to the HNP.
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "notifier_hnp.h"
|
||||
|
||||
|
||||
static int orte_notifier_hnp_component_query(mca_base_module_t **module,
|
||||
int *priority);
|
||||
|
||||
|
||||
/*
|
||||
* Struct of function pointers that need to be initialized
|
||||
*/
|
||||
orte_notifier_base_component_t mca_notifier_hnp_component = {
|
||||
{
|
||||
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
|
||||
|
||||
"hnp", /* MCA module name */
|
||||
ORTE_MAJOR_VERSION, /* MCA module major version */
|
||||
ORTE_MINOR_VERSION, /* MCA module minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA module release version */
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
orte_notifier_hnp_component_query /* module query */
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
};
|
||||
|
||||
static int orte_notifier_hnp_component_query(mca_base_module_t **module,
|
||||
int *priority)
|
||||
{
|
||||
*priority = 10;
|
||||
*module = (mca_base_module_t *)&orte_notifier_hnp_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -1,205 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <string.h>
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
#ifdef HAVE_HNP_H
|
||||
#include <hnp.h>
|
||||
#endif
|
||||
#ifdef HAVE_STDARG_H
|
||||
#include <stdarg.h>
|
||||
#endif
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/dss/dss_types.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "notifier_hnp.h"
|
||||
|
||||
/* Static API's */
|
||||
static int init(void);
|
||||
static void finalize(void);
|
||||
static void mylog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap);
|
||||
static void myhelplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename, const char *topic, va_list ap);
|
||||
static void mypeerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap);
|
||||
static void myeventlog(const char *msg);
|
||||
|
||||
/* Module definition */
|
||||
orte_notifier_base_module_t orte_notifier_hnp_module = {
|
||||
init,
|
||||
finalize,
|
||||
mylog,
|
||||
myhelplog,
|
||||
mypeerlog,
|
||||
myeventlog
|
||||
};
|
||||
|
||||
static int send_command(orte_notifier_base_severity_t severity, int errcode,
|
||||
char *msg)
|
||||
{
|
||||
opal_buffer_t *buf;
|
||||
int rc;
|
||||
uint8_t u8 = (uint8_t) severity;
|
||||
uint32_t u32 = (uint32_t) errcode;
|
||||
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
if (NULL == buf) {
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Pack the severity (need to use a fixed-size type) */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &u8, 1, OPAL_UINT8))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Pack the errcode (need to use a fixed-size type) */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &u32, 1, OPAL_UINT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Pack the message */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &msg, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Now send the buffer (rc = number of bytes sent) */
|
||||
rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, buf,
|
||||
ORTE_RML_TAG_NOTIFIER_HNP, 0);
|
||||
if (rc <= 0) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* If I'm the HNP, post a non-blocking RML receive */
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_NOTIFIER_HNP,
|
||||
ORTE_RML_PERSISTENT,
|
||||
orte_notifier_hnp_recv_cb,
|
||||
NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void finalize(void)
|
||||
{
|
||||
/* If I'm the HNP, then cancel the non-blocking RML receive */
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFIER_HNP);
|
||||
}
|
||||
}
|
||||
|
||||
static void mylog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap)
|
||||
{
|
||||
char *output;
|
||||
|
||||
/* If there was a message, output it */
|
||||
vasprintf(&output, msg, ap);
|
||||
|
||||
if (NULL != output) {
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
/* output it locally */
|
||||
orte_show_help("orte_notifier_hnp.txt", "notifier message", false, output);
|
||||
} else {
|
||||
send_command(severity, errcode, output);
|
||||
}
|
||||
free(output);
|
||||
}
|
||||
}
|
||||
|
||||
static void myhelplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename, const char *topic, va_list ap)
|
||||
{
|
||||
char *output;
|
||||
|
||||
output = opal_show_help_vstring(filename, topic, false, ap);
|
||||
|
||||
if (NULL != output) {
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
/* output it locally */
|
||||
orte_show_help("orte_notifier_hnp.txt", "notifier message", false, output);
|
||||
} else {
|
||||
send_command(severity, errcode, output);
|
||||
}
|
||||
free(output);
|
||||
}
|
||||
}
|
||||
|
||||
static void mypeerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
char *buf = orte_notifier_base_peer_log(errcode, peer_proc, msg, ap);
|
||||
|
||||
if (NULL != buf) {
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
/* output it locally */
|
||||
orte_show_help("orte_notifier_hnp.txt", "notifier message", false, buf);
|
||||
} else {
|
||||
send_command(severity, errcode, buf);
|
||||
}
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void myeventlog(const char *msg)
|
||||
{
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
/* output it locally */
|
||||
orte_show_help("orte_notifier_hnp.txt", "notifier message", false, (char*)msg);
|
||||
} else {
|
||||
send_command(ORTE_NOTIFIER_NOTICE, ORTE_SUCCESS, (char *)msg);
|
||||
}
|
||||
}
|
@ -1,76 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/constants.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
|
||||
#include "notifier_hnp.h"
|
||||
|
||||
void orte_notifier_hnp_recv_cb(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
uint8_t u8;
|
||||
uint32_t u32;
|
||||
int rc, count;
|
||||
orte_notifier_base_severity_t severity;
|
||||
int errcode;
|
||||
char *msg;
|
||||
|
||||
/* Unpack the severity */
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = opal_dss.unpack(buffer, &u8, &count, OPAL_UINT8))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
severity = (orte_notifier_base_severity_t) u8;
|
||||
|
||||
/* Unpack the errcode */
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = opal_dss.unpack(buffer, &u32, &count, OPAL_UINT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
errcode = (int) u32;
|
||||
|
||||
/* Unpack the string */
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = opal_dss.unpack(buffer, &msg, &count, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
|
||||
orte_show_help("orte_notifier_hnp.txt", "notifier message", false, msg);
|
||||
|
||||
CLEAN_RETURN:
|
||||
return;
|
||||
}
|
||||
|
@ -1,36 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English help file for HNP notifier messages.
|
||||
#
|
||||
# FORMAT:
|
||||
# <severity> filename:linenum:functionname
|
||||
# error message string
|
||||
# stacktrace (optional)
|
||||
[general message]
|
||||
| |--<%s> at %s:%d:%s():
|
||||
| | %s
|
||||
#
|
||||
[msg header]
|
||||
| %s
|
||||
#
|
||||
# We marshall all the parameters into a single message when we
|
||||
# relay it to the notifier.
|
||||
[notifier message]
|
||||
%s
|
@ -1,180 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All Rights Reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file:
|
||||
*
|
||||
* The OpenRTE Notifier Framework
|
||||
*
|
||||
* The OpenRTE Notifier framework provides a mechanism for notifying
|
||||
* system administrators or other fault monitoring systems that a
|
||||
* problem with the underlying cluster has been detected - e.g., a
|
||||
* failed connection in a network fabric
|
||||
*/
|
||||
|
||||
#ifndef MCA_NOTIFIER_H
|
||||
#define MCA_NOTIFIER_H
|
||||
|
||||
/*
|
||||
* includes
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_STDARG_H
|
||||
#include <stdarg.h>
|
||||
#endif
|
||||
#ifdef HAVE_LIMITS_H
|
||||
#include <limits.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
#include <syslog.h>
|
||||
#endif
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
|
||||
#include "orte/constants.h"
|
||||
#include "orte/types.h"
|
||||
|
||||
#include "notifier_event_types.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* The maximum size of any on-stack buffers used in the notifier
|
||||
* so we can try to avoid calling malloc in OUT_OF_RESOURCES conditions.
|
||||
* The code has NOT been auditied for use of malloc, so this still
|
||||
* may fail to get the "OUT_OF_RESOURCE" message out. Oh Well.
|
||||
*/
|
||||
#define ORTE_NOTIFIER_MAX_BUF 512
|
||||
|
||||
/* Severities */
|
||||
typedef enum {
|
||||
ORTE_NOTIFIER_EMERG = LOG_EMERG,
|
||||
ORTE_NOTIFIER_ALERT = LOG_ALERT,
|
||||
ORTE_NOTIFIER_CRIT = LOG_CRIT,
|
||||
ORTE_NOTIFIER_ERROR = LOG_ERR,
|
||||
ORTE_NOTIFIER_WARN = LOG_WARNING,
|
||||
ORTE_NOTIFIER_NOTICE = LOG_NOTICE,
|
||||
ORTE_NOTIFIER_INFO = LOG_INFO,
|
||||
ORTE_NOTIFIER_DEBUG = LOG_DEBUG
|
||||
} orte_notifier_base_severity_t;
|
||||
|
||||
/*
|
||||
* Component functions - all MUST be provided!
|
||||
*/
|
||||
|
||||
/* initialize the selected module */
|
||||
typedef int (*orte_notifier_base_module_init_fn_t)(void);
|
||||
|
||||
/* finalize the selected module */
|
||||
typedef void (*orte_notifier_base_module_finalize_fn_t)(void);
|
||||
|
||||
/* Log a failure message */
|
||||
typedef void (*orte_notifier_base_module_log_fn_t)(orte_notifier_base_severity_t severity, int errcode, const char *msg, va_list ap)
|
||||
__opal_attribute_format_funcptr__(__printf__, 3, 0);
|
||||
|
||||
/* Log a failure that is based upon a show_help message */
|
||||
typedef void (*orte_notifier_base_module_log_show_help_fn_t)(orte_notifier_base_severity_t severity, int errcode, const char *file, const char *topic, va_list ap);
|
||||
|
||||
/* Log a failure related to a peer */
|
||||
typedef void (*orte_notifier_base_module_log_peer_fn_t)(orte_notifier_base_severity_t severity, int errcode, orte_process_name_t *peer_proc, const char *msg, va_list ap)
|
||||
__opal_attribute_format_funcptr__(__printf__, 4, 0);
|
||||
|
||||
/* Log an unusual event message */
|
||||
typedef void (*orte_notifier_base_module_log_event_fn_t)(const char *msg);
|
||||
|
||||
/*
|
||||
* Ver 1.0
|
||||
*/
|
||||
struct orte_notifier_base_module_1_0_0_t {
|
||||
orte_notifier_base_module_init_fn_t init;
|
||||
orte_notifier_base_module_finalize_fn_t finalize;
|
||||
orte_notifier_base_module_log_fn_t log;
|
||||
orte_notifier_base_module_log_show_help_fn_t help;
|
||||
orte_notifier_base_module_log_peer_fn_t peer;
|
||||
orte_notifier_base_module_log_event_fn_t log_event;
|
||||
};
|
||||
|
||||
typedef struct orte_notifier_base_module_1_0_0_t orte_notifier_base_module_1_0_0_t;
|
||||
typedef orte_notifier_base_module_1_0_0_t orte_notifier_base_module_t;
|
||||
|
||||
/*
|
||||
* API functions
|
||||
*/
|
||||
/* Log a failure message */
|
||||
typedef void (*orte_notifier_base_API_log_fn_t)(orte_notifier_base_severity_t severity, int errcode, const char *msg, ...);
|
||||
|
||||
/* Log a failure that is based upon a show_help message */
|
||||
typedef void (*orte_notifier_base_API_log_show_help_fn_t)(orte_notifier_base_severity_t severity, int errcode, const char *file, const char *topic, ...);
|
||||
|
||||
/* Log a failure related to a peer */
|
||||
typedef void (*orte_notifier_base_API_log_peer_fn_t)(orte_notifier_base_severity_t severity, int errcode, orte_process_name_t *peer_proc, const char *msg, ...);
|
||||
|
||||
/*
|
||||
* Define a struct to hold the API functions that users will call
|
||||
*/
|
||||
struct orte_notifier_API_module_1_0_0_t {
|
||||
orte_notifier_base_API_log_fn_t log;
|
||||
orte_notifier_base_API_log_show_help_fn_t show_help;
|
||||
orte_notifier_base_API_log_peer_fn_t log_peer;
|
||||
};
|
||||
typedef struct orte_notifier_API_module_1_0_0_t orte_notifier_API_module_1_0_0_t;
|
||||
typedef orte_notifier_API_module_1_0_0_t orte_notifier_API_module_t;
|
||||
|
||||
ORTE_DECLSPEC extern orte_notifier_API_module_t orte_notifier;
|
||||
|
||||
/*
|
||||
* the standard component data structure
|
||||
*/
|
||||
struct orte_notifier_base_component_1_0_0_t {
|
||||
mca_base_component_t base_version;
|
||||
mca_base_component_data_t base_data;
|
||||
};
|
||||
typedef struct orte_notifier_base_component_1_0_0_t orte_notifier_base_component_1_0_0_t;
|
||||
typedef orte_notifier_base_component_1_0_0_t orte_notifier_base_component_t;
|
||||
|
||||
|
||||
/*
|
||||
* Macro for use in components that are of type notifier v1.0.0
|
||||
*/
|
||||
#define ORTE_NOTIFIER_BASE_VERSION_1_0_0 \
|
||||
/* notifier v1.0 is chained to MCA v2.0 */ \
|
||||
MCA_BASE_VERSION_2_0_0, \
|
||||
/* notifier v1.0 */ \
|
||||
"notifier", 1, 0, 0
|
||||
|
||||
/*
|
||||
* To manage unusual events notifications
|
||||
* Set to noop if not wanted
|
||||
*/
|
||||
|
||||
#if ORTE_WANT_NOTIFIER_LOG_EVENT
|
||||
|
||||
#include "notifier_event_calls.h"
|
||||
|
||||
#else /* ORTE_WANT_NOTIFIER_LOG_EVENT */
|
||||
|
||||
#define ORTE_NOTIFIER_DEFINE_EVENT(i, m)
|
||||
#define ORTE_NOTIFIER_LOG_EVENT(i, c, t) do {} while (0)
|
||||
|
||||
#endif /* ORTE_WANT_NOTIFIER_LOG_EVENT */
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_NOTIFIER_H */
|
@ -1,170 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef ORTE_NOTIFIER_EVENTS_CALLS_H
|
||||
#define ORTE_NOTIFIER_EVENTS_CALLS_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_STDIO_H
|
||||
#include <stdio.h>
|
||||
#endif /* HAVE_STDIO_H */
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
|
||||
#include "opal/class/opal_object.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
|
||||
|
||||
#define ORTE_NOTIFIER_LOG_0 0 /* Initial log format needed (no delay) */
|
||||
#define ORTE_NOTIFIER_LOG_1 1 /* Intermediate log format needed (delay) */
|
||||
#define ORTE_NOTIFIER_LOG_2 2 /* Final log format needed (at finalize) */
|
||||
|
||||
ORTE_DECLSPEC bool notifier_log_event_enabled(void);
|
||||
ORTE_DECLSPEC void notifier_event_store(orte_notifier_event_t *);
|
||||
ORTE_DECLSPEC void notifier_trace_event(int, int, int32_t, time_t, time_t,
|
||||
const char *);
|
||||
|
||||
|
||||
/*
|
||||
* Do not use this function directly: use ORTE_NOTIFIER_DEFINE_EVENT() instead
|
||||
*/
|
||||
static inline orte_notifier_event_t *notifier_alloc_event(int ev_id,
|
||||
const char *msg)
|
||||
{
|
||||
orte_notifier_event_t *ev;
|
||||
|
||||
ev = OBJ_NEW(orte_notifier_event_t);
|
||||
if (NULL == ev) {
|
||||
return NULL;
|
||||
}
|
||||
asprintf(&ev->ev_msg, msg);
|
||||
if (NULL == ev->ev_msg) {
|
||||
OBJ_RELEASE(ev);
|
||||
return NULL;
|
||||
}
|
||||
ev->ev_id = ev_id;
|
||||
/*
|
||||
* Store the allocated event into a list to be able to manage the
|
||||
* unconditional event tracing and freeing during finalize.
|
||||
*/
|
||||
notifier_event_store(ev);
|
||||
return ev;
|
||||
}
|
||||
|
||||
|
||||
static inline void notifier_count_and_log_event(orte_notifier_event_t *ev,
|
||||
int ev_id,
|
||||
int cnt_thresh,
|
||||
int time_thresh)
|
||||
{
|
||||
time_t now, delay;
|
||||
int32_t count;
|
||||
|
||||
opal_atomic_add_32(&ev->ev_cnt, 1);
|
||||
if (ev->ev_cnt <= cnt_thresh) {
|
||||
return;
|
||||
}
|
||||
|
||||
count = ev->ev_cnt;
|
||||
now = time(NULL);
|
||||
if (ev->ev_already_traced) {
|
||||
if (now > ev->ev_time_trc + time_thresh) {
|
||||
delay = now - ev->ev_time_trc;
|
||||
ev->ev_cnt = 0;
|
||||
ev->ev_time_trc = now;
|
||||
notifier_trace_event(ORTE_NOTIFIER_LOG_1, ev_id, count, now, delay,
|
||||
ev->ev_msg);
|
||||
}
|
||||
} else {
|
||||
ev->ev_already_traced = 1;
|
||||
ev->ev_cnt = 0;
|
||||
ev->ev_time_trc = now;
|
||||
/* We don't care about the delay for the very 1st trace */
|
||||
notifier_trace_event(ORTE_NOTIFIER_LOG_0, ev_id, count, now, now,
|
||||
ev->ev_msg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define notifier_event_fn_prefix(i) notifier_log_event_ ## i
|
||||
|
||||
/*
|
||||
* This macro should be called each time a new event will be traced.
|
||||
* It expands to a static inline function suffixed by the event number.
|
||||
*/
|
||||
#define ORTE_NOTIFIER_DEFINE_EVENT(i, m) \
|
||||
static inline void notifier_event_fn_prefix(i) (int c_thr, int t_thr) \
|
||||
{ \
|
||||
static orte_notifier_event_t *prefix_ ## i = NULL; \
|
||||
if (!notifier_log_event_enabled()) { \
|
||||
return; \
|
||||
} \
|
||||
if (NULL == prefix_ ## i) { \
|
||||
prefix_ ## i = notifier_alloc_event(i, m); \
|
||||
if (NULL == prefix_ ## i) { \
|
||||
return; \
|
||||
} \
|
||||
} \
|
||||
notifier_count_and_log_event(prefix_ ## i, i, c_thr, t_thr); \
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the log interface that should be called whenever an unsual event
|
||||
* should be warned about.
|
||||
* The event should have been defined before, using
|
||||
* ORTE_NOTIFIER_DEFINE_EVENT():
|
||||
*
|
||||
* (1) Event definition:
|
||||
*
|
||||
* Typically in a header file call the following:
|
||||
* ORTE_NOTIFIER_DEFINE_EVENT(0, "message 0")
|
||||
* This macro expands to
|
||||
* static inline void notifier_log_event_0(int c_thr, int t_thr)
|
||||
* {
|
||||
* static orte_notifier_event_t *prefix_0 = NULL;
|
||||
* if (!notifier_log_event_enabled()) {
|
||||
* return;
|
||||
* }
|
||||
* if (NULL == prefix_0) {
|
||||
* prefix_0 = notifier_alloc_event(0, "message 0");
|
||||
* if (NULL == prefix_0) {
|
||||
* return;
|
||||
* }
|
||||
* }
|
||||
* notifier_count_and_log_event(prefix_0, 0, c_thr, t_thr);
|
||||
* }
|
||||
*
|
||||
* (2) Event accounting and tracing:
|
||||
*
|
||||
* Whenever you want to trace the unusual event whose id is 0, just call:
|
||||
* ORTE_NOTIFIER_LOG_EVENT(0, 100, 1);
|
||||
* 100 and 1 are respectively the counter and time thresholds.
|
||||
* This actually expands to
|
||||
* notifier_log_event_0(100, 1);
|
||||
*/
|
||||
#define ORTE_NOTIFIER_LOG_EVENT(i, c, t) notifier_event_fn_prefix(i) (c, t)
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTE_NOTIFIER_EVENT_CALLS_H */
|
@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef ORTE_NOTIFIER_BASE_EVENTS_H
|
||||
#define ORTE_NOTIFIER_BASE_EVENTS_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/class/opal_object.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
volatile int32_t ev_cnt;
|
||||
int ev_id;
|
||||
int ev_already_traced;
|
||||
time_t ev_time_trc;
|
||||
char *ev_msg;
|
||||
} orte_notifier_event_t;
|
||||
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_notifier_event_t);
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTE_NOTIFIER_BASE_EVENTS_H */
|
@ -1,51 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = $(notifier_smtp_CPPFLAGS)
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
help-orte-notifier-smtp.txt
|
||||
|
||||
sources = \
|
||||
notifier_smtp.h \
|
||||
notifier_smtp_module.c \
|
||||
notifier_smtp_component.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_notifier_smtp_DSO
|
||||
component_noinst =
|
||||
component_install = mca_notifier_smtp.la
|
||||
else
|
||||
component_noinst = libmca_notifier_smtp.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_notifier_smtp_la_SOURCES = $(sources)
|
||||
mca_notifier_smtp_la_LDFLAGS = -module -avoid-version $(notifier_smtp_LDFLAGS)
|
||||
mca_notifier_smtp_la_LIBADD = $(notifier_smtp_LIBS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_notifier_smtp_la_SOURCES =$(sources)
|
||||
libmca_notifier_smtp_la_LDFLAGS = -module -avoid-version $(notifier_smtp_LDFLAGS)
|
||||
libmca_notifier_smtp_la_LIBADD = $(notifier_smtp_LIBS)
|
@ -1,41 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_notifier_smtp_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_notifier_smtp_CONFIG], [
|
||||
AC_CONFIG_FILES([orte/mca/notifier/smtp/Makefile])
|
||||
|
||||
OPAL_SETUP_COMPONENT_PACKAGE([notifier],
|
||||
[smtp],
|
||||
[esmtp],
|
||||
[include/libesmtp.h],
|
||||
[libesmtp*],
|
||||
[libesmtp.h],
|
||||
[esmtp],
|
||||
[smtp_create_session],
|
||||
[],
|
||||
[orte_notifier_want_smtp=1],
|
||||
[orte_notifier_want_smtp=0])
|
||||
|
||||
AS_IF([test "$orte_notifier_want_smtp" = 1 -a "$orte_without_full_support" = 0],
|
||||
[$1],
|
||||
[$2])
|
||||
])dnl
|
@ -1,33 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English help file for Open MPI's SMTP notifier support
|
||||
#
|
||||
[to/from not specified]
|
||||
Error: the Open MPI SMTP notifier component had no "to" and/or "from"
|
||||
email addresses specified.
|
||||
#
|
||||
[server not specified]
|
||||
Error: the Open MPI SMTP notifier component had no SMTP server name or
|
||||
IP address specified.
|
||||
#
|
||||
[unable to resolve server]
|
||||
Sorry, Open MPI's SMTP notifier component was unable to resolve the IP
|
||||
address of the server provided.
|
||||
|
||||
Server: %s
|
||||
#
|
||||
[send_email failed]
|
||||
Oops! Open MPI's SMTP notifier failed to send an email.
|
||||
|
||||
Reason: %s
|
||||
libESMTP function: %s
|
||||
libESMTP message: %s
|
||||
Message: %s
|
||||
#
|
@ -1,64 +0,0 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
#ifndef NOTIFIER_SMTP_H
|
||||
#define NOTIFIER_SMTP_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <netdb.h>
|
||||
|
||||
#include "libesmtp.h"
|
||||
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef struct {
|
||||
orte_notifier_base_component_t super;
|
||||
|
||||
/* SMTP server name and port */
|
||||
char *server;
|
||||
int port;
|
||||
|
||||
/* To, From, Subject */
|
||||
char *to, **to_argv, *from_name, *from_addr, *subject;
|
||||
|
||||
/* Mail body prefix and suffix */
|
||||
char *body_prefix, *body_suffix;
|
||||
|
||||
/* struct hostent from resolved SMTP server name */
|
||||
struct hostent *server_hostent;
|
||||
|
||||
/* Priority of this component */
|
||||
int priority;
|
||||
} orte_notifier_smtp_component_t;
|
||||
|
||||
|
||||
/*
|
||||
* Notifier interfaces
|
||||
*/
|
||||
ORTE_MODULE_DECLSPEC extern orte_notifier_smtp_component_t
|
||||
mca_notifier_smtp_component;
|
||||
extern orte_notifier_base_module_t orte_notifier_smtp_module;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
@ -1,229 +0,0 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Simple smtp notifier (using libesmtp)
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/constants.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "notifier_smtp.h"
|
||||
|
||||
static int smtp_component_query(mca_base_module_t **module, int *priority);
|
||||
static int smtp_close(void);
|
||||
static int smtp_register(void);
|
||||
|
||||
/*
|
||||
* Struct of function pointers that need to be initialized
|
||||
*/
|
||||
orte_notifier_smtp_component_t mca_notifier_smtp_component = {
|
||||
{
|
||||
{
|
||||
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
|
||||
|
||||
"smtp",
|
||||
|
||||
ORTE_MAJOR_VERSION,
|
||||
ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION,
|
||||
|
||||
NULL,
|
||||
smtp_close,
|
||||
smtp_component_query,
|
||||
smtp_register,
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
},
|
||||
|
||||
/* SMTP server and port */
|
||||
"localhost",
|
||||
25,
|
||||
|
||||
/* To, from, subject */
|
||||
NULL,
|
||||
NULL,
|
||||
"Open MPI Notifier",
|
||||
NULL,
|
||||
"Open MPI notifier",
|
||||
|
||||
/* Mail body prefix, suffix */
|
||||
"The Open MPI SMTP notifier wishes to inform you of the following message:\n\n",
|
||||
"\n\nSincerely,\nOscar the Open MPI Owl",
|
||||
|
||||
/* Struct hostent */
|
||||
NULL,
|
||||
|
||||
/* Priority */
|
||||
10,
|
||||
};
|
||||
|
||||
static int smtp_register(void)
|
||||
{
|
||||
char version[256];
|
||||
|
||||
/* Server stuff */
|
||||
mca_base_param_reg_string(&mca_notifier_smtp_component.super.base_version,
|
||||
"server",
|
||||
"SMTP server name or IP address",
|
||||
false, false,
|
||||
mca_notifier_smtp_component.server,
|
||||
&mca_notifier_smtp_component.server);
|
||||
mca_base_param_reg_int(&mca_notifier_smtp_component.super.base_version,
|
||||
"port",
|
||||
"SMTP server port",
|
||||
false, false,
|
||||
mca_notifier_smtp_component.port,
|
||||
&mca_notifier_smtp_component.port);
|
||||
|
||||
/* Email stuff */
|
||||
mca_base_param_reg_string(&mca_notifier_smtp_component.super.base_version,
|
||||
"to",
|
||||
"Comma-delimited list of email addresses to send to",
|
||||
false, false,
|
||||
mca_notifier_smtp_component.to,
|
||||
&mca_notifier_smtp_component.to);
|
||||
mca_base_param_reg_string(&mca_notifier_smtp_component.super.base_version,
|
||||
"from_addr",
|
||||
"Email address that messages will be from",
|
||||
false, false,
|
||||
mca_notifier_smtp_component.from_addr,
|
||||
&mca_notifier_smtp_component.from_addr);
|
||||
mca_base_param_reg_string(&mca_notifier_smtp_component.super.base_version,
|
||||
"from_name",
|
||||
"Email name that messages will be from",
|
||||
false, false,
|
||||
mca_notifier_smtp_component.from_name,
|
||||
&mca_notifier_smtp_component.from_name);
|
||||
mca_base_param_reg_string(&mca_notifier_smtp_component.super.base_version,
|
||||
"subject",
|
||||
"Email subject",
|
||||
false, false,
|
||||
mca_notifier_smtp_component.subject,
|
||||
&mca_notifier_smtp_component.subject);
|
||||
|
||||
/* Mail body prefix and suffix */
|
||||
mca_base_param_reg_string(&mca_notifier_smtp_component.super.base_version,
|
||||
"body_prefix",
|
||||
"Text to put at the beginning of the mail message",
|
||||
false, false,
|
||||
mca_notifier_smtp_component.body_prefix,
|
||||
&mca_notifier_smtp_component.body_prefix);
|
||||
mca_base_param_reg_string(&mca_notifier_smtp_component.super.base_version,
|
||||
"body_suffix",
|
||||
"Text to put at the beginning of the mail message",
|
||||
false, false,
|
||||
mca_notifier_smtp_component.body_suffix,
|
||||
&mca_notifier_smtp_component.body_suffix);
|
||||
|
||||
/* Priority */
|
||||
mca_base_param_reg_int(&mca_notifier_smtp_component.super.base_version,
|
||||
"priority",
|
||||
"Priority of this component",
|
||||
false, false,
|
||||
mca_notifier_smtp_component.priority,
|
||||
&mca_notifier_smtp_component.priority);
|
||||
|
||||
/* Libesmtp version */
|
||||
smtp_version(version, sizeof(version), 0);
|
||||
version[sizeof(version) - 1] = '\0';
|
||||
mca_base_param_reg_string(&mca_notifier_smtp_component.super.base_version,
|
||||
"libesmtp_version",
|
||||
"Version of libesmtp that this component is linked against",
|
||||
false, true, version, NULL);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int smtp_close(void)
|
||||
{
|
||||
if (NULL != mca_notifier_smtp_component.server) {
|
||||
free(mca_notifier_smtp_component.server);
|
||||
}
|
||||
|
||||
if (NULL != mca_notifier_smtp_component.to) {
|
||||
free(mca_notifier_smtp_component.to);
|
||||
}
|
||||
if (NULL != mca_notifier_smtp_component.from_name) {
|
||||
free(mca_notifier_smtp_component.from_name);
|
||||
}
|
||||
if (NULL != mca_notifier_smtp_component.from_addr) {
|
||||
free(mca_notifier_smtp_component.from_addr);
|
||||
}
|
||||
if (NULL != mca_notifier_smtp_component.subject) {
|
||||
free(mca_notifier_smtp_component.subject);
|
||||
}
|
||||
if (NULL != mca_notifier_smtp_component.body_prefix) {
|
||||
free(mca_notifier_smtp_component.body_prefix);
|
||||
}
|
||||
if (NULL != mca_notifier_smtp_component.body_suffix) {
|
||||
free(mca_notifier_smtp_component.body_suffix);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int smtp_component_query(mca_base_module_t **module,
|
||||
int *priority)
|
||||
{
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
|
||||
/* If there's no to or from, there's no love */
|
||||
if (NULL == mca_notifier_smtp_component.to ||
|
||||
'\0' == mca_notifier_smtp_component.to[0] ||
|
||||
NULL == mca_notifier_smtp_component.from_addr ||
|
||||
'\0' == mca_notifier_smtp_component.from_addr[0]) {
|
||||
orte_show_help("help-orte-notifier-smtp.txt",
|
||||
"to/from not specified", true);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* Sanity checks */
|
||||
if (NULL == mca_notifier_smtp_component.server ||
|
||||
'\0' == mca_notifier_smtp_component.server[0]) {
|
||||
orte_show_help("help-orte-notifier-smtp.txt",
|
||||
"server not specified", true);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* Since we have to open a socket later, try to resolve the IP
|
||||
address of the server now. Save the result, or abort if we
|
||||
can't resolve it. */
|
||||
mca_notifier_smtp_component.server_hostent =
|
||||
gethostbyname(mca_notifier_smtp_component.server);
|
||||
if (NULL == mca_notifier_smtp_component.server_hostent) {
|
||||
orte_show_help("help-orte-notifier-smtp.txt",
|
||||
"unable to resolve server",
|
||||
true, mca_notifier_smtp_component.server);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
*priority = 10;
|
||||
*module = (mca_base_module_t *)&orte_notifier_smtp_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -1,354 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Send an email upon notifier events.
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#ifdef HAVE_STDARG_H
|
||||
#include <stdarg.h>
|
||||
#endif
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SIGNAL_H
|
||||
#include <signal.h>
|
||||
#endif
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/argv.h"
|
||||
|
||||
#include "orte/constants.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/util/error_strings.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
#include "notifier_smtp.h"
|
||||
|
||||
|
||||
/* Static API's */
|
||||
static void mylog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap);
|
||||
static void myhelplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename,
|
||||
const char *topic, va_list ap);
|
||||
static void mypeerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc,
|
||||
const char *msg, va_list ap);
|
||||
|
||||
/* Module */
|
||||
orte_notifier_base_module_t orte_notifier_smtp_module = {
|
||||
NULL,
|
||||
NULL,
|
||||
mylog,
|
||||
myhelplog,
|
||||
mypeerlog,
|
||||
NULL
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
SENT_NONE,
|
||||
SENT_HEADER,
|
||||
SENT_BODY_PREFIX,
|
||||
SENT_BODY,
|
||||
SENT_BODY_SUFFIX,
|
||||
SENT_ALL
|
||||
} sent_flag_t;
|
||||
|
||||
typedef struct {
|
||||
sent_flag_t sent_flag;
|
||||
char *msg;
|
||||
char *prev_string;
|
||||
} message_status_t;
|
||||
|
||||
/*
|
||||
* Convert lone \n's to \r\n
|
||||
*/
|
||||
static char *crnl(char *orig)
|
||||
{
|
||||
int i, j, max, count;
|
||||
char *str;
|
||||
return strdup(orig);
|
||||
|
||||
/* Count how much space we need */
|
||||
count = max = strlen(orig);
|
||||
for (i = 0; i < max; ++i) {
|
||||
if (orig[i] == '\n' && i > 0 && orig[i - 1] != '\r') {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy, changing \n to \r\n */
|
||||
str = malloc(count + 1);
|
||||
for (j = i = 0; i < max; ++i) {
|
||||
if (orig[i] == '\n' && i > 0 && orig[i - 1] != '\r') {
|
||||
str[j++] = '\n';
|
||||
}
|
||||
str[j++] = orig[i];
|
||||
}
|
||||
str[j] = '\0';
|
||||
return str;
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback function invoked via smtp_start_session()
|
||||
*/
|
||||
static const char *message_cb(void **buf, int *len, void *arg)
|
||||
{
|
||||
message_status_t *ms = (message_status_t*) arg;
|
||||
|
||||
if (NULL == *buf) {
|
||||
*buf = malloc(8192);
|
||||
}
|
||||
if (NULL == len) {
|
||||
ms->sent_flag = SENT_NONE;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Free the previous string */
|
||||
if (NULL != ms->prev_string) {
|
||||
free(ms->prev_string);
|
||||
ms->prev_string = NULL;
|
||||
}
|
||||
|
||||
switch (ms->sent_flag) {
|
||||
case SENT_NONE:
|
||||
/* Send a blank line to signify the end of the header */
|
||||
ms->sent_flag = SENT_HEADER;
|
||||
ms->prev_string = NULL;
|
||||
*len = 2;
|
||||
return "\r\n";
|
||||
|
||||
case SENT_HEADER:
|
||||
if (NULL != mca_notifier_smtp_component.body_prefix) {
|
||||
ms->sent_flag = SENT_BODY_PREFIX;
|
||||
ms->prev_string = crnl(mca_notifier_smtp_component.body_prefix);
|
||||
*len = strlen(ms->prev_string);
|
||||
return ms->prev_string;
|
||||
}
|
||||
|
||||
case SENT_BODY_PREFIX:
|
||||
ms->sent_flag = SENT_BODY;
|
||||
ms->prev_string = crnl(ms->msg);
|
||||
*len = strlen(ms->prev_string);
|
||||
return ms->prev_string;
|
||||
|
||||
case SENT_BODY:
|
||||
if (NULL != mca_notifier_smtp_component.body_suffix) {
|
||||
ms->sent_flag = SENT_BODY_SUFFIX;
|
||||
ms->prev_string = crnl(mca_notifier_smtp_component.body_suffix);
|
||||
*len = strlen(ms->prev_string);
|
||||
return ms->prev_string;
|
||||
}
|
||||
|
||||
case SENT_BODY_SUFFIX:
|
||||
case SENT_ALL:
|
||||
default:
|
||||
ms->sent_flag = SENT_ALL;
|
||||
*len = 0;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Back-end function to actually send the email
|
||||
*/
|
||||
static int send_email(char *msg)
|
||||
{
|
||||
int i, err = ORTE_SUCCESS;
|
||||
char *str = NULL;
|
||||
char *errmsg = NULL;
|
||||
struct sigaction sig, oldsig;
|
||||
bool set_oldsig = false;
|
||||
smtp_session_t session = NULL;
|
||||
smtp_message_t message = NULL;
|
||||
message_status_t ms;
|
||||
orte_notifier_smtp_component_t *c = &mca_notifier_smtp_component;
|
||||
|
||||
if (NULL == c->to_argv) {
|
||||
c->to_argv = opal_argv_split(c->to, ',');
|
||||
if (NULL == c->to_argv ||
|
||||
NULL == c->to_argv[0]) {
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
|
||||
ms.sent_flag = SENT_NONE;
|
||||
ms.prev_string = NULL;
|
||||
ms.msg = msg;
|
||||
|
||||
/* Temporarily disable SIGPIPE so that if remote servers timeout
|
||||
or hang up on us, it doesn't kill this application. We'll
|
||||
restore the original SIGPIPE handler when we're done. */
|
||||
sig.sa_handler = SIG_IGN;
|
||||
sigemptyset(&sig.sa_mask);
|
||||
sig.sa_flags = 0;
|
||||
sigaction(SIGPIPE, &sig, &oldsig);
|
||||
set_oldsig = true;
|
||||
|
||||
/* Try to get a libesmtp session. If so, assume that libesmtp is
|
||||
happy and proceeed */
|
||||
session = smtp_create_session();
|
||||
if (NULL == session) {
|
||||
err = ORTE_ERR_NOT_SUPPORTED;
|
||||
errmsg = "stmp_create_session";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Create the message */
|
||||
message = smtp_add_message(session);
|
||||
if (NULL == message) {
|
||||
err = ORTE_ERROR;
|
||||
errmsg = "stmp_add_message";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Set the SMTP server (yes, it's a weird return status!) */
|
||||
asprintf(&str, "%s:%d", c->server, c->port);
|
||||
if (0 == smtp_set_server(session, str)) {
|
||||
err = ORTE_ERROR;
|
||||
errmsg = "stmp_set_server";
|
||||
goto error;
|
||||
}
|
||||
free(str);
|
||||
str = NULL;
|
||||
|
||||
/* Add the sender */
|
||||
if (0 == smtp_set_reverse_path(message, c->from_addr)) {
|
||||
err = ORTE_ERROR;
|
||||
errmsg = "stmp_set_reverse_path";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Set the subject and some headers */
|
||||
asprintf(&str, "Open MPI SMTP Notifier v%d.%d.%d",
|
||||
c->super.base_version.mca_component_major_version,
|
||||
c->super.base_version.mca_component_minor_version,
|
||||
c->super.base_version.mca_component_release_version);
|
||||
if (0 == smtp_set_header(message, "Subject", c->subject) ||
|
||||
0 == smtp_set_header_option(message, "Subject", Hdr_OVERRIDE, 1) ||
|
||||
0 == smtp_set_header(message, "To", NULL, NULL) ||
|
||||
0 == smtp_set_header(message, "From",
|
||||
(NULL != c->from_name ?
|
||||
c->from_name : c->from_addr),
|
||||
c->from_addr) ||
|
||||
0 == smtp_set_header(message, "X-Mailer", str) ||
|
||||
0 == smtp_set_header_option(message, "Subject", Hdr_OVERRIDE, 1)) {
|
||||
err = ORTE_ERROR;
|
||||
errmsg = "smtp_set_header";
|
||||
goto error;
|
||||
}
|
||||
free(str);
|
||||
str = NULL;
|
||||
|
||||
/* Add the recipients */
|
||||
for (i = 0; NULL != c->to_argv[i]; ++i) {
|
||||
if (NULL == smtp_add_recipient(message, c->to_argv[i])) {
|
||||
err = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
errmsg = "stmp_add_recipient";
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/* Set the callback to get the message */
|
||||
if (0 == smtp_set_messagecb(message, message_cb, &ms)) {
|
||||
err = ORTE_ERROR;
|
||||
errmsg = "smtp_set_messagecb";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Send it! */
|
||||
if (0 == smtp_start_session(session)) {
|
||||
err = ORTE_ERROR;
|
||||
errmsg = "smtp_start_session";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Fall through */
|
||||
|
||||
error:
|
||||
if (NULL != str) {
|
||||
free(str);
|
||||
}
|
||||
if (NULL != session) {
|
||||
smtp_destroy_session(session);
|
||||
}
|
||||
/* Restore the SIGPIPE handler */
|
||||
if (set_oldsig) {
|
||||
sigaction(SIGPIPE, &oldsig, NULL);
|
||||
}
|
||||
if (ORTE_SUCCESS != err) {
|
||||
int e;
|
||||
char em[256];
|
||||
|
||||
e = smtp_errno();
|
||||
smtp_strerror(e, em, sizeof(em));
|
||||
orte_show_help("help-orte-notifier-smtp.txt",
|
||||
"send_email failed",
|
||||
true, "libesmtp library call failed",
|
||||
errmsg, em, e, msg);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void mylog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap)
|
||||
{
|
||||
char *output;
|
||||
|
||||
/* If there was a message, output it */
|
||||
vasprintf(&output, msg, ap);
|
||||
|
||||
if (NULL != output) {
|
||||
send_email(output);
|
||||
free(output);
|
||||
}
|
||||
}
|
||||
|
||||
static void myhelplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename,
|
||||
const char *topic, va_list ap)
|
||||
{
|
||||
char *output = opal_show_help_vstring(filename, topic, false, ap);
|
||||
|
||||
if (NULL != output) {
|
||||
send_email(output);
|
||||
free(output);
|
||||
}
|
||||
}
|
||||
|
||||
static void mypeerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
char *buf = orte_notifier_base_peer_log(errcode, peer_proc, msg, ap);
|
||||
|
||||
if (NULL != buf) {
|
||||
send_email(buf);
|
||||
free(buf);
|
||||
}
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
sources = \
|
||||
notifier_syslog.h \
|
||||
notifier_syslog_module.c \
|
||||
notifier_syslog_component.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_notifier_syslog_DSO
|
||||
component_noinst =
|
||||
component_install = mca_notifier_syslog.la
|
||||
else
|
||||
component_noinst = libmca_notifier_syslog.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_notifier_syslog_la_SOURCES = $(sources)
|
||||
mca_notifier_syslog_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_notifier_syslog_la_SOURCES =$(sources)
|
||||
libmca_notifier_syslog_la_LDFLAGS = -module -avoid-version
|
@ -1,19 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# MCA_notifier_syslog_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_notifier_syslog_CONFIG], [
|
||||
AC_CONFIG_FILES([orte/mca/notifier/syslog/Makefile])
|
||||
|
||||
AS_IF([test "$orte_without_full_support" = 0],
|
||||
[$1],
|
||||
[$2])
|
||||
])
|
@ -1,39 +0,0 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
#ifndef NOTIFIER_SYSLOG_H
|
||||
#define NOTIFIER_SYSLOG_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Notifier interfaces
|
||||
*/
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_notifier_base_component_t mca_notifier_syslog_component;
|
||||
extern orte_notifier_base_module_t orte_notifier_syslog_module;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
@ -1,63 +0,0 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file:
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* includes
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "notifier_syslog.h"
|
||||
|
||||
|
||||
static int orte_notifier_syslog_component_query(mca_base_module_t **module,
|
||||
int *priority);
|
||||
|
||||
/*
|
||||
* Struct of function pointers that need to be initialized
|
||||
*/
|
||||
orte_notifier_base_component_t mca_notifier_syslog_component = {
|
||||
{
|
||||
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
|
||||
|
||||
"syslog", /* MCA module name */
|
||||
ORTE_MAJOR_VERSION, /* MCA module major version */
|
||||
ORTE_MINOR_VERSION, /* MCA module minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA module release version */
|
||||
NULL,
|
||||
NULL,
|
||||
orte_notifier_syslog_component_query /* module query */
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
};
|
||||
|
||||
static int orte_notifier_syslog_component_query(mca_base_module_t **module,
|
||||
int *priority)
|
||||
{
|
||||
*priority = 1;
|
||||
*module = (mca_base_module_t *)&orte_notifier_syslog_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -1,127 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <string.h>
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
#include <syslog.h>
|
||||
#endif
|
||||
#ifdef HAVE_STDARG_H
|
||||
#include <stdarg.h>
|
||||
#endif
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "orte/util/error_strings.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "notifier_syslog.h"
|
||||
|
||||
|
||||
/* Static API's */
|
||||
static int init(void);
|
||||
static void finalize(void);
|
||||
static void mylog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap);
|
||||
static void myhelplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename, const char *topic, va_list ap);
|
||||
static void mypeerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap);
|
||||
static void myeventlog(const char *msg);
|
||||
|
||||
/* Module def */
|
||||
orte_notifier_base_module_t orte_notifier_syslog_module = {
|
||||
init,
|
||||
finalize,
|
||||
mylog,
|
||||
myhelplog,
|
||||
mypeerlog,
|
||||
myeventlog
|
||||
};
|
||||
|
||||
|
||||
static int init(void)
|
||||
{
|
||||
int opts;
|
||||
|
||||
opts = LOG_CONS | LOG_PID;
|
||||
openlog("Open MPI Error Report:", opts, LOG_USER);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void finalize(void)
|
||||
{
|
||||
closelog();
|
||||
}
|
||||
|
||||
static void mylog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap)
|
||||
{
|
||||
/* If there was a message, output it */
|
||||
#if defined(HAVE_VSYSLOG)
|
||||
vsyslog(severity, msg, ap);
|
||||
#else
|
||||
char *output;
|
||||
vasprintf(&output, msg, ap);
|
||||
syslog(severity, output, NULL);
|
||||
free(output);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void myhelplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename, const char *topic, va_list ap)
|
||||
{
|
||||
char *output = opal_show_help_vstring(filename, topic, false, ap);
|
||||
|
||||
/* if nothing came back, then nothing to do */
|
||||
if (NULL == output) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* go ahead and output it */
|
||||
syslog(severity, output, NULL);
|
||||
free(output);
|
||||
}
|
||||
|
||||
static void mypeerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
char *buf = orte_notifier_base_peer_log(errcode, peer_proc, msg, ap);
|
||||
|
||||
if (NULL != buf) {
|
||||
syslog(severity, buf, NULL);
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void myeventlog(const char *msg)
|
||||
{
|
||||
/* If there was a message, output it */
|
||||
syslog(LOG_LOCAL0 | LOG_NOTICE, msg, NULL);
|
||||
}
|
||||
|
@ -64,7 +64,6 @@
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#include "oob_tcp.h"
|
||||
@ -641,10 +640,6 @@ void mca_oob_tcp_peer_shutdown(mca_oob_tcp_peer_t* peer)
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
(NULL == host) ? "NULL" : host);
|
||||
/* provide a notifier message */
|
||||
orte_notifier.log_peer(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE, &(peer->peer_name),
|
||||
"OOB connection retries exceeded. Cannot communicate with peer %s.",
|
||||
ORTE_JOBID_PRINT(peer->peer_name.jobid));
|
||||
|
||||
/* There are cases during the initial connection setup where
|
||||
the peer_send_msg is NULL but there are things in the queue
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
|
||||
|
@ -57,7 +57,6 @@
|
||||
#include "orte/mca/filem/filem.h"
|
||||
#include "orte/mca/filem/base/base.h"
|
||||
#include "orte/mca/grpcomm/base/base.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
#include "orte/mca/sensor/sensor.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
@ -121,9 +121,6 @@ BEGIN_C_DECLS
|
||||
/* tag for receiving heartbeats */
|
||||
#define ORTE_RML_TAG_HEARTBEAT 39
|
||||
|
||||
/* notifier data */
|
||||
#define ORTE_RML_TAG_NOTIFIER_HNP 40
|
||||
|
||||
/* Process Migration Tool Tag */
|
||||
#define ORTE_RML_TAG_MIGRATE 43
|
||||
|
||||
|
@ -55,7 +55,6 @@
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
#include "orte/mca/sstore/sstore.h"
|
||||
#include "orte/mca/sstore/base/base.h"
|
||||
@ -418,34 +417,29 @@ static void snapc_none_global_cmdline_request(int status,
|
||||
* Utility functions
|
||||
********************/
|
||||
|
||||
/* Report the checkpoint status over the notifier interface */
|
||||
/* Report the checkpoint status */
|
||||
void orte_snapc_ckpt_state_notify(int state)
|
||||
{
|
||||
switch(state) {
|
||||
case ORTE_SNAPC_CKPT_STATE_ESTABLISHED:
|
||||
orte_notifier.log(ORTE_NOTIFIER_INFO, ORTE_SNAPC_CKPT_NOTIFY(state),
|
||||
"%d: Checkpoint established for process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
opal_output(0, "%d: Checkpoint established for process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
break;
|
||||
case ORTE_SNAPC_CKPT_STATE_NO_CKPT:
|
||||
orte_notifier.log(ORTE_NOTIFIER_WARN, ORTE_SNAPC_CKPT_NOTIFY(state),
|
||||
"%d: Process %s is not checkpointable.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
opal_output(0, "%d: Process %s is not checkpointable.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
break;
|
||||
case ORTE_SNAPC_CKPT_STATE_ERROR:
|
||||
orte_notifier.log(ORTE_NOTIFIER_WARN, ORTE_SNAPC_CKPT_NOTIFY(state),
|
||||
"%d: Failed to checkpoint process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
opal_output(0, "%d: Failed to checkpoint process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
break;
|
||||
case ORTE_SNAPC_CKPT_STATE_RECOVERED:
|
||||
orte_notifier.log(ORTE_NOTIFIER_INFO, ORTE_SNAPC_CKPT_NOTIFY(state),
|
||||
"%d: Successfully restarted process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
opal_output(0, "%d: Successfully restarted process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
break;
|
||||
case ORTE_SNAPC_CKPT_STATE_NO_RESTART:
|
||||
orte_notifier.log(ORTE_NOTIFIER_WARN, ORTE_SNAPC_CKPT_NOTIFY(state),
|
||||
"%d: Failed to restart process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
opal_output(0, "%d: Failed to restart process %s.",
|
||||
orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid));
|
||||
break;
|
||||
/* ADK: We currently do not notify for these states, but good to
|
||||
* have them around anyways. */
|
||||
@ -677,7 +671,7 @@ int orte_snapc_base_global_coord_ckpt_update_cmd(orte_process_name_t* peer,
|
||||
}
|
||||
|
||||
/*
|
||||
* Pass on the checkpoint state over the notifier interface.
|
||||
* Pass on the checkpoint state.
|
||||
*/
|
||||
orte_snapc_ckpt_state_notify(ckpt_status);
|
||||
|
||||
|
@ -26,7 +26,6 @@
|
||||
#include "orte/mca/ras/base/base.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
#include "orte/mca/sensor/sensor.h"
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/runtime/orte_quit.h"
|
||||
@ -356,7 +355,6 @@ static void check_all_complete(int fd, short args, void *cbdata)
|
||||
orte_std_cntr_t index;
|
||||
bool one_still_alive;
|
||||
orte_vpid_t lowest=0;
|
||||
char *msg;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_state_base_output,
|
||||
"%s state:hnp:check_job_complete on job %s",
|
||||
@ -559,18 +557,6 @@ static void check_all_complete(int fd, short args, void *cbdata)
|
||||
* wasn't already set by an error condition
|
||||
*/
|
||||
ORTE_UPDATE_EXIT_STATUS(0);
|
||||
/* provide a notifier message if that framework is active - ignored otherwise */
|
||||
if (NULL != (job = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, 1))) {
|
||||
if (0 == orte_exit_status) {
|
||||
asprintf(&msg, "Job %s complete", ORTE_JOBID_PRINT(job->jobid));
|
||||
orte_notifier.log(ORTE_NOTIFIER_INFO, 0, msg);
|
||||
} else {
|
||||
asprintf(&msg, "Job %s terminated abnormally", ORTE_JOBID_PRINT(job->jobid));
|
||||
orte_notifier.log(ORTE_NOTIFIER_ALERT, orte_exit_status, msg);
|
||||
}
|
||||
free(msg);
|
||||
/* this job object will be release during finalize */
|
||||
}
|
||||
|
||||
/* order daemon termination - this tells us to cleanup
|
||||
* our local procs as well as telling remote daemons
|
||||
|
@ -65,8 +65,6 @@
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/mca/state/base/base.h"
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "orte/mca/iof/iof.h"
|
||||
#include "orte/mca/iof/base/base.h"
|
||||
#include "orte/mca/oob/oob.h"
|
||||
@ -351,14 +349,6 @@ void orte_info_open_components(void)
|
||||
opal_pointer_array_add(&component_map, map);
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
if (ORTE_SUCCESS != orte_notifier_base_open()) {
|
||||
goto error;
|
||||
}
|
||||
map = OBJ_NEW(orte_info_component_map_t);
|
||||
map->type = strdup("notifier");
|
||||
map->components = &orte_notifier_base_components_available;
|
||||
opal_pointer_array_add(&component_map, map);
|
||||
|
||||
if (ORTE_SUCCESS != mca_oob_base_open()) {
|
||||
goto error;
|
||||
}
|
||||
@ -504,7 +494,6 @@ void orte_info_close_components()
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
(void) orte_snapc_base_close();
|
||||
#endif
|
||||
(void) orte_notifier_base_close();
|
||||
(void) orte_filem_base_close();
|
||||
(void) orte_iof_base_close();
|
||||
(void) orte_plm_base_close();
|
||||
|
@ -240,7 +240,6 @@ int main(int argc, char *argv[])
|
||||
opal_pointer_array_add(&mca_types, "ess");
|
||||
opal_pointer_array_add(&mca_types, "grpcomm");
|
||||
opal_pointer_array_add(&mca_types, "db");
|
||||
opal_pointer_array_add(&mca_types, "notifier");
|
||||
|
||||
/* Execute the desired action(s) */
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user