2008-02-28 04:57:57 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2011-06-24 00:38:02 +04:00
|
|
|
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
2008-02-28 04:57:57 +03:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
/** @file:
|
|
|
|
*
|
|
|
|
* Populates global structure with system-specific information.
|
|
|
|
*
|
|
|
|
* Notes: add limits.h, compute size of integer and other types via sizeof(type)*CHAR_BIT
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _ORTE_UTIL_COMM_H_
|
|
|
|
#define _ORTE_UTIL_COMM_H_
|
|
|
|
|
|
|
|
#include "orte_config.h"
|
|
|
|
#include "orte/types.h"
|
|
|
|
|
|
|
|
#include "orte/runtime/orte_globals.h"
|
|
|
|
|
|
|
|
BEGIN_C_DECLS
|
|
|
|
|
2009-09-09 09:28:45 +04:00
|
|
|
typedef uint8_t orte_comm_event_t;
|
|
|
|
#define ORTE_COMM_EVENT OPAL_UINT8
|
|
|
|
|
|
|
|
#define ORTE_COMM_EVENT_ALLOCATE 0x01
|
|
|
|
#define ORTE_COMM_EVENT_MAP 0x02
|
|
|
|
#define ORTE_COMM_EVENT_LAUNCH 0x04
|
|
|
|
|
|
|
|
ORTE_DECLSPEC int orte_util_comm_connect_tool(char *uri);
|
|
|
|
|
|
|
|
ORTE_DECLSPEC int orte_util_comm_report_event(orte_comm_event_t ev);
|
|
|
|
|
2008-02-28 04:57:57 +03:00
|
|
|
ORTE_DECLSPEC int orte_util_comm_query_job_info(const orte_process_name_t *hnp, orte_jobid_t job,
|
|
|
|
int *num_jobs, orte_job_t ***job_info_array);
|
|
|
|
|
2008-04-30 23:49:53 +04:00
|
|
|
ORTE_DECLSPEC int orte_util_comm_query_node_info(const orte_process_name_t *hnp, char *node,
|
2008-02-28 04:57:57 +03:00
|
|
|
int *num_nodes, orte_node_t ***node_info_array);
|
|
|
|
|
|
|
|
ORTE_DECLSPEC int orte_util_comm_query_proc_info(const orte_process_name_t *hnp, orte_jobid_t job, orte_vpid_t vpid,
|
2011-08-27 02:16:14 +04:00
|
|
|
int *num_procs, orte_proc_t ***proc_info_array);
|
2008-02-28 04:57:57 +03:00
|
|
|
|
|
|
|
ORTE_DECLSPEC int orte_util_comm_spawn_job(const orte_process_name_t *hnp, orte_job_t *jdata);
|
|
|
|
|
|
|
|
ORTE_DECLSPEC int orte_util_comm_terminate_job(const orte_process_name_t *hnp, orte_jobid_t job);
|
|
|
|
|
Afraid this has a couple of things mixed into the commit. Couldn't be helped - had missed one commit prior to running out the door on vacation.
Fix race conditions in abnormal terminations. We had done a first-cut at this in a prior commit. However, the window remained partially open due to the fact that the HNP has multiple paths leading to orte_finalize. Most of our frameworks don't care if they are finalized more than once, but one of them does, which meant we segfaulted if orte_finalize got called more than once. Besides, we really shouldn't be doing that anyway.
So we now introduce a set of atomic locks that prevent us from multiply calling abort, attempting to call orte_finalize, etc. My initial tests indicate this is working cleanly, but since it is a race condition issue, more testing will have to be done before we know for sure that this problem has been licked.
Also, some updates relevant to the tool comm library snuck in here. Since those also touched the orted code (as did the prior changes), I didn't want to attempt to separate them out - besides, they are coming in soon anyway. More on them later as that functionality approaches completion.
This commit was SVN r17843.
2008-03-17 20:58:59 +03:00
|
|
|
ORTE_DECLSPEC int orte_util_comm_halt_vm(const orte_process_name_t *hnp);
|
|
|
|
|
2008-02-28 04:57:57 +03:00
|
|
|
END_C_DECLS
|
|
|
|
#endif
|