1
1

Take the next step towards fully utilizing static ports for the daemons to eliminate the initial "phone home" to mpirun by modifying the orted termination procedure to eliminate the need for a full barrier-like operation. Instead, we add a "onesided" barrier to the grpcomm framework API that releases the orted once it has completed its own contribution to the barrier - i.e., the orteds now exit as the "ack" message rolls up towards mpirun instead of sending the "ack" directly to mpirun.

This causes the orteds in the routing tree to remain alive until all termination "acks" from orteds below them have passed through. Thus, if we use static ports, we no longer require a direct orted-to-mpirun connection.

Also modify the binomial routed module so it conforms to what all the other routed modules do and have all messages pass along the routing tree instead of short-circuiting between orteds. This further reduces the number of ports being opened on backend nodes.

This commit was SVN r21203.
Этот коммит содержится в:
Ralph Castain 2009-05-11 14:11:44 +00:00
родитель c6f0499720
Коммит c45ff0d59f
29 изменённых файлов: 382 добавлений и 222 удалений

Просмотреть файл

@ -57,6 +57,7 @@ ORTE_DECLSPEC extern int orte_ess_base_output;
ORTE_DECLSPEC extern opal_list_t orte_ess_base_components_available;
#if !ORTE_DISABLE_FULL_SUPPORT
/*

Просмотреть файл

@ -37,6 +37,7 @@
#include "orte/mca/rml/base/base.h"
#include "orte/mca/routed/base/base.h"
#include "orte/mca/routed/routed.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/grpcomm/base/base.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/plm/base/base.h"
@ -296,6 +297,9 @@ int orte_ess_base_orted_finalize(void)
{
opal_list_item_t *item;
/* ensure all the orteds depart together */
orte_grpcomm.onesided_barrier();
orte_notifier_base_close();
orte_cr_finalize();

Просмотреть файл

@ -192,6 +192,8 @@ static int rte_finalize(void)
/* if I am a daemon, finalize using the default procedure */
if (ORTE_PROC_IS_DAEMON) {
/* don't need to do the barrier */
orte_orted_exit_with_barrier = false;
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
ORTE_ERROR_LOG(ret);
}

Просмотреть файл

@ -74,7 +74,7 @@ int orte_ess_tool_component_query(mca_base_module_t **module, int *priority)
* precedence. This would happen, for example,
* if the tool is a distributed set of processes
*/
if (ORTE_PROC_IS_TOOL || ORTE_PROC_IS_TOOL_WNAME) {
if (ORTE_PROC_IS_TOOL) {
*priority = 10;
*module = (mca_base_module_t *)&orte_ess_tool_module;
return ORTE_SUCCESS;

Просмотреть файл

@ -50,6 +50,7 @@ static int xcast(orte_jobid_t job,
orte_rml_tag_t tag);
static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf);
static int barrier(void);
static int onesided_barrier(void);
static int modex(opal_list_t *procs);
/* Module def */
@ -60,6 +61,7 @@ orte_grpcomm_base_module_t orte_grpcomm_bad_module = {
allgather,
orte_grpcomm_base_allgather_list,
barrier,
onesided_barrier,
orte_grpcomm_base_set_proc_attr,
orte_grpcomm_base_get_proc_attr,
modex,
@ -278,6 +280,142 @@ static int barrier(void)
return ORTE_SUCCESS;
}
static int num_onesided_barrier_recvd;
static void process_onesided_barrier(int fd, short event, void *data)
{
orte_message_event_t *mev = (orte_message_event_t*)data;
/* release the message */
OBJ_RELEASE(mev);
/* flag as recvd */
num_onesided_barrier_recvd++;
}
static void onesided_barrier_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
int rc;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:bad:receive got message from %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* don't process this right away - we need to get out of the recv before
* we process the message as it may ask us to do something that involves
* more messaging! Instead, setup an event so that the message gets processed
* as soon as we leave the recv.
*
* The macro makes a copy of the buffer, which we release above - the incoming
* buffer, however, is NOT released here, although its payload IS transferred
* to the message buffer for later processing
*/
ORTE_MESSAGE_EVENT(sender, buffer, tag, process_onesided_barrier);
/* reissue the recv */
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_ONESIDED_BARRIER,
ORTE_RML_NON_PERSISTENT,
onesided_barrier_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
}
return;
}
/* quick timeout loop */
static bool timer_fired;
static void quicktime_cb(int fd, short event, void *cbdata)
{
/* declare it fired */
timer_fired = true;
}
static int onesided_barrier(void)
{
int num_participating;
opal_list_t daemon_tree;
opal_buffer_t buf;
orte_process_name_t my_parent;
opal_event_t *quicktime=NULL;
struct timeval quicktimeval;
int rc;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:bad: onesided barrier called",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* if we are not to use the barrier, then just return */
if (!orte_orted_exit_with_barrier) {
if (ORTE_PROC_IS_HNP) {
/* if we are the HNP, we need to do a little delay to give
* the orteds a chance to exit before we leave
*/
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:bad: onesided barrier adding delay timer",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
quicktimeval.tv_sec = 0;
quicktimeval.tv_usec = 100;
timer_fired = false;
ORTE_DETECT_TIMEOUT(&quicktime, orte_process_info.num_procs, 1000, 10000, quicktime_cb);
ORTE_PROGRESSED_WAIT(timer_fired, 0, 1);
}
return ORTE_SUCCESS;
}
/* initialize things */
num_onesided_barrier_recvd = 0;
num_participating = 0;
/* figure out how many participants we should be expecting */
OBJ_CONSTRUCT(&daemon_tree, opal_list_t);
my_parent.jobid = ORTE_PROC_MY_NAME->jobid;
my_parent.vpid = orte_routed.get_routing_tree(&daemon_tree);
num_participating = opal_list_get_size(&daemon_tree);
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:bad: onesided barrier num_participating %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_participating));
/* set the recv */
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_ONESIDED_BARRIER,
ORTE_RML_NON_PERSISTENT,
onesided_barrier_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
}
/* wait to recv them */
ORTE_PROGRESSED_WAIT(false, num_onesided_barrier_recvd, num_participating);
/* cancel the recv */
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ONESIDED_BARRIER);
/* if I am the HNP, then we are done */
if (ORTE_PROC_IS_HNP) {
return ORTE_SUCCESS;
}
/* send a zero-byte msg to my parent */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
/* send it */
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:bad:onsided:barrier not the HNP - sending to parent %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&my_parent)));
if (0 > (rc = orte_rml.send_buffer(&my_parent, &buf, ORTE_RML_TAG_ONESIDED_BARRIER, 0))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
return rc;
}
OBJ_DESTRUCT(&buf);
return ORTE_SUCCESS;
}
static opal_buffer_t *allgather_buf;
static orte_std_cntr_t allgather_complete;

Просмотреть файл

@ -54,6 +54,7 @@ static int xcast(orte_jobid_t job,
orte_rml_tag_t tag);
static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf);
static int barrier(void);
static int onesided_barrier(void);
static int modex(opal_list_t *procs);
static int set_proc_attr(const char *attr_name, const void *data, size_t size);
static int get_proc_attr(const orte_process_name_t proc,
@ -68,6 +69,7 @@ orte_grpcomm_base_module_t orte_grpcomm_basic_module = {
allgather,
orte_grpcomm_base_allgather_list,
barrier,
onesided_barrier,
set_proc_attr,
get_proc_attr,
modex,
@ -357,6 +359,142 @@ static int barrier(void)
return ORTE_SUCCESS;
}
static int num_onesided_barrier_recvd;
static void process_onesided_barrier(int fd, short event, void *data)
{
orte_message_event_t *mev = (orte_message_event_t*)data;
/* release the message */
OBJ_RELEASE(mev);
/* flag as recvd */
num_onesided_barrier_recvd++;
}
static void onesided_barrier_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
int rc;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:bad:receive got message from %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* don't process this right away - we need to get out of the recv before
* we process the message as it may ask us to do something that involves
* more messaging! Instead, setup an event so that the message gets processed
* as soon as we leave the recv.
*
* The macro makes a copy of the buffer, which we release above - the incoming
* buffer, however, is NOT released here, although its payload IS transferred
* to the message buffer for later processing
*/
ORTE_MESSAGE_EVENT(sender, buffer, tag, process_onesided_barrier);
/* reissue the recv */
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_ONESIDED_BARRIER,
ORTE_RML_NON_PERSISTENT,
onesided_barrier_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
}
return;
}
/* quick timeout loop */
static bool timer_fired;
static void quicktime_cb(int fd, short event, void *cbdata)
{
/* declare it fired */
timer_fired = true;
}
static int onesided_barrier(void)
{
int num_participating;
opal_list_t daemon_tree;
opal_buffer_t buf;
orte_process_name_t my_parent;
opal_event_t *quicktime=NULL;
struct timeval quicktimeval;
int rc;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:basic: onesided barrier called",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* if we are not to use the barrier, then just return */
if (!orte_orted_exit_with_barrier) {
if (ORTE_PROC_IS_HNP) {
/* if we are the HNP, we need to do a little delay to give
* the orteds a chance to exit before we leave
*/
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:basic: onesided barrier adding delay timer",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
quicktimeval.tv_sec = 0;
quicktimeval.tv_usec = 100;
timer_fired = false;
ORTE_DETECT_TIMEOUT(&quicktime, orte_process_info.num_procs, 1000, 10000, quicktime_cb);
ORTE_PROGRESSED_WAIT(timer_fired, 0, 1);
}
return ORTE_SUCCESS;
}
/* initialize things */
num_onesided_barrier_recvd = 0;
num_participating = 0;
/* figure out how many participants we should be expecting */
OBJ_CONSTRUCT(&daemon_tree, opal_list_t);
my_parent.jobid = ORTE_PROC_MY_NAME->jobid;
my_parent.vpid = orte_routed.get_routing_tree(&daemon_tree);
num_participating = opal_list_get_size(&daemon_tree);
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:basic: onesided barrier num_participating %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_participating));
/* set the recv */
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_ONESIDED_BARRIER,
ORTE_RML_NON_PERSISTENT,
onesided_barrier_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
}
/* wait to recv them */
ORTE_PROGRESSED_WAIT(false, num_onesided_barrier_recvd, num_participating);
/* cancel the recv */
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ONESIDED_BARRIER);
/* if I am the HNP, then we are done */
if (ORTE_PROC_IS_HNP) {
return ORTE_SUCCESS;
}
/* send a zero-byte msg to my parent */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
/* send it */
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
"%s grpcomm:basic:onsided:barrier not the HNP - sending to parent %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&my_parent)));
if (0 > (rc = orte_rml.send_buffer(&my_parent, &buf, ORTE_RML_TAG_ONESIDED_BARRIER, 0))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
return rc;
}
OBJ_DESTRUCT(&buf);
return ORTE_SUCCESS;
}
static opal_buffer_t *allgather_buf;
static orte_std_cntr_t allgather_complete;

Просмотреть файл

@ -71,6 +71,7 @@ orte_grpcomm_base_module_t orte_grpcomm_cnos_module = {
allgather,
allgather_list,
orte_grpcomm_cnos_barrier,
orte_grpcomm_cnos_barrier,
set_proc_attr,
get_proc_attr,
modex,

Просмотреть файл

@ -71,7 +71,12 @@ typedef int (*orte_grpcomm_base_module_allgather_list_fn_t)(opal_list_t *names,
/* barrier function */
typedef int (*orte_grpcomm_base_module_barrier_fn_t)(void);
/* one-sided barrier function - process releases once its
* contribution is complete
*/
typedef int (*orte_grpcomm_base_module_onesided_barrier_fn_t)(void);
/** DATA EXCHANGE FUNCTIONS - SEE ompi/runtime/ompi_module_exchange.h FOR A DESCRIPTION
* OF HOW THIS ALL WORKS
*/
@ -103,6 +108,7 @@ struct orte_grpcomm_base_module_2_0_0_t {
orte_grpcomm_base_module_allgather_fn_t allgather;
orte_grpcomm_base_module_allgather_list_fn_t allgather_list;
orte_grpcomm_base_module_barrier_fn_t barrier;
orte_grpcomm_base_module_onesided_barrier_fn_t onesided_barrier;
/* modex functions */
orte_grpcomm_base_module_modex_set_proc_attr_fn_t set_proc_attr;
orte_grpcomm_base_module_modex_get_proc_attr_fn_t get_proc_attr;

Просмотреть файл

@ -67,6 +67,7 @@ orte_grpcomm_base_module_t orte_grpcomm_hier_module = {
allgather,
orte_grpcomm_base_allgather_list,
barrier,
NULL, /* onesided barrier only used by daemons */
set_proc_attr,
get_proc_attr,
modex,

Просмотреть файл

@ -48,8 +48,7 @@ typedef uint8_t orte_daemon_cmd_flag_t;
#define ORTE_DAEMON_ADD_LOCAL_PROCS (orte_daemon_cmd_flag_t) 4
#define ORTE_DAEMON_TREE_SPAWN (orte_daemon_cmd_flag_t) 5
#define ORTE_DAEMON_HEARTBEAT_CMD (orte_daemon_cmd_flag_t) 6
#define ORTE_DAEMON_EXIT_WITH_REPLY_CMD (orte_daemon_cmd_flag_t) 7
#define ORTE_DAEMON_EXIT_NO_REPLY_CMD (orte_daemon_cmd_flag_t) 8
#define ORTE_DAEMON_EXIT_CMD (orte_daemon_cmd_flag_t) 7
#define ORTE_DAEMON_PROCESS_AND_RELAY_CMD (orte_daemon_cmd_flag_t) 9
#define ORTE_DAEMON_MESSAGE_LOCAL_PROCS (orte_daemon_cmd_flag_t) 10
#define ORTE_DAEMON_NULL_CMD (orte_daemon_cmd_flag_t) 11

Просмотреть файл

@ -445,7 +445,7 @@ static int plm_alps_terminate_orteds(void)
orte_wait_cb_cancel(alps_pid);
/* tell them to die! */
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_WITH_REPLY_CMD))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
ORTE_ERROR_LOG(rc);
}

Просмотреть файл

@ -614,7 +614,7 @@ int plm_ccp_terminate_orteds()
int rc;
/* now tell them to die! */
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_WITH_REPLY_CMD))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
ORTE_ERROR_LOG(rc);
}

Просмотреть файл

@ -395,7 +395,7 @@ static int plm_lsf_terminate_orteds(void)
int rc;
/* tell them to die! */
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_WITH_REPLY_CMD))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
ORTE_ERROR_LOG(rc);
}

Просмотреть файл

@ -885,7 +885,7 @@ int orte_plm_process_terminate_orteds(void)
int rc;
/* now tell them to die! */
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_WITH_REPLY_CMD))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
ORTE_ERROR_LOG(rc);
}

Просмотреть файл

@ -1358,7 +1358,7 @@ int orte_plm_rsh_terminate_orteds(void)
/* now tell them to die - we need them to "phone home", though,
* so we can know that they have exited
*/
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_WITH_REPLY_CMD))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
ORTE_ERROR_LOG(rc);
}

Просмотреть файл

@ -125,6 +125,9 @@ static int plm_slurm_init(void)
local_launch_available = true;
}
/* we don't need a barrier to exit */
orte_orted_exit_with_barrier = false;
return rc;
}
@ -475,7 +478,7 @@ static int plm_slurm_terminate_orteds(void)
/* tell them to die without sending a reply - we will rely on the
* waitpid to tell us when they have exited!
*/
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_NO_REPLY_CMD))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
ORTE_ERROR_LOG(rc);
}

Просмотреть файл

@ -963,7 +963,7 @@ int orte_plm_submit_terminate_orteds(void)
int rc;
/* now tell them to die! */
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_WITH_REPLY_CMD))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
ORTE_ERROR_LOG(rc);
}

Просмотреть файл

@ -482,7 +482,7 @@ int plm_tm_terminate_orteds(void)
int rc;
/* now tell them to die! */
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_WITH_REPLY_CMD))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
ORTE_ERROR_LOG(rc);
}

Просмотреть файл

@ -621,7 +621,7 @@ int plm_tmd_terminate_orteds(void)
aborted = false;
/* tell them to die! */
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_NO_REPLY_CMD))) {
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD))) {
ORTE_ERROR_LOG(rc);
}

Просмотреть файл

@ -214,7 +214,7 @@ orte_plm_xgrid_terminate_orteds(void)
{
int rc;
rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_WITH_REPLY_CMD);
rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_CMD);
if (ORTE_SUCCESS != rc) {
rc = [mca_plm_xgrid_component.client terminateOrteds];
}

Просмотреть файл

@ -109,6 +109,10 @@ BEGIN_C_DECLS
/* profile data */
#define ORTE_RML_TAG_GRPCOMM_PROFILE 33
/* onesided barrier */
#define ORTE_RML_TAG_ONESIDED_BARRIER 34
#define ORTE_RML_TAG_MAX 100

Просмотреть файл

@ -221,8 +221,7 @@ static int update_route(orte_process_name_t *target,
/* if I am an application process, we don't update the route since
* we automatically route everything through the local daemon
*/
if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON &&
!ORTE_PROC_IS_TOOL) {
if (ORTE_PROC_IS_APP) {
return ORTE_SUCCESS;
}
@ -300,6 +299,8 @@ static int update_route(orte_process_name_t *target,
static orte_process_name_t get_route(orte_process_name_t *target)
{
orte_process_name_t *ret, daemon;
opal_list_item_t *item;
orte_routed_tree_t *child;
int rc;
if (target->jobid == ORTE_JOBID_INVALID ||
@ -315,12 +316,17 @@ static orte_process_name_t get_route(orte_process_name_t *target)
}
/* if I am an application process, always route via my local daemon */
if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON &&
!ORTE_PROC_IS_TOOL) {
if (ORTE_PROC_IS_APP) {
ret = ORTE_PROC_MY_DAEMON;
goto found;
}
/* if I am a tool, the route is direct */
if (ORTE_PROC_IS_TOOL) {
ret = target;
goto found;
}
/****** HNP AND DAEMONS ONLY ******/
/* if the job family is zero, then this is going to a local slave,
@ -355,8 +361,9 @@ static orte_process_name_t get_route(orte_process_name_t *target)
/* THIS CAME FROM OUR OWN JOB FAMILY... */
/* if this is going to the HNP, send direct */
if (ORTE_PROC_MY_HNP->jobid == target->jobid &&
/* if we are not using static ports and this is going to the HNP, send direct */
if (!orte_static_ports &&
ORTE_PROC_MY_HNP->jobid == target->jobid &&
ORTE_PROC_MY_HNP->vpid == target->vpid) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routing not enabled - going direct",
@ -372,15 +379,38 @@ static orte_process_name_t get_route(orte_process_name_t *target)
ret = ORTE_NAME_INVALID;
goto found;
}
/* if the daemon is me, then send direct to the target! */
if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) {
ret = target;
goto found;
} else {
/* otherwise, we send it directly to that daemon */
ret = &daemon;
/* search routing tree for next step to that daemon */
for (item = opal_list_get_first(&my_children);
item != opal_list_get_end(&my_children);
item = opal_list_get_next(item)) {
child = (orte_routed_tree_t*)item;
if (child->vpid == daemon.vpid) {
/* the child is hosting the proc - just send it there */
ret = &daemon;
goto found;
}
/* otherwise, see if the daemon we need is below the child */
if (opal_bitmap_is_set_bit(&child->relatives, daemon.vpid)) {
/* yep - we need to step through this child */
daemon.vpid = child->vpid;
ret = &daemon;
goto found;
}
}
}
/* if we get here, then the target daemon is not beneath
* any of our children, so we have to step up through our parent
*/
daemon.vpid = my_parent.vpid;
ret = &daemon;
found:
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routed_binomial_get(%s) --> %s",

Просмотреть файл

@ -205,8 +205,7 @@ static int update_route(orte_process_name_t *target,
/* if I am an application process, we don't update the route since
* we automatically route everything through the local daemon
*/
if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON &&
!ORTE_PROC_IS_TOOL) {
if (ORTE_PROC_IS_APP) {
return ORTE_SUCCESS;
}
@ -293,8 +292,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
}
/* if I am an application process, always route via my local daemon */
if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON &&
!ORTE_PROC_IS_TOOL) {
if (ORTE_PROC_IS_APP) {
ret = ORTE_PROC_MY_DAEMON;
goto found;
}
@ -376,7 +374,6 @@ static orte_process_name_t get_route(orte_process_name_t *target)
}
found:
OPAL_OUTPUT_VERBOSE((0, orte_routed_base_output,
"%s routed_linear_get(%s) --> %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),

Просмотреть файл

@ -221,8 +221,7 @@ static int update_route(orte_process_name_t *target,
/* if I am an application process, we don't update the route since
* we automatically route everything through the local daemon
*/
if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON &&
!ORTE_PROC_IS_TOOL) {
if (ORTE_PROC_IS_APP) {
return ORTE_SUCCESS;
}
@ -317,8 +316,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
}
/* if I am an application process, always route via my local daemon */
if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON &&
!ORTE_PROC_IS_TOOL) {
if (ORTE_PROC_IS_APP) {
ret = ORTE_PROC_MY_DAEMON;
goto found;
}
@ -357,8 +355,9 @@ static orte_process_name_t get_route(orte_process_name_t *target)
/* THIS CAME FROM OUR OWN JOB FAMILY... */
/* if this is going to the HNP, send direct */
if (ORTE_PROC_MY_HNP->jobid == target->jobid &&
/* if we are not using static ports and this is going to the HNP, send direct */
if (!orte_static_ports &&
ORTE_PROC_MY_HNP->jobid == target->jobid &&
ORTE_PROC_MY_HNP->vpid == target->vpid) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routing not enabled - going direct",
@ -369,7 +368,6 @@ static orte_process_name_t get_route(orte_process_name_t *target)
daemon.jobid = ORTE_PROC_MY_NAME->jobid;
/* find out what daemon hosts this proc */
/* find out what daemon hosts this proc */
if (ORTE_VPID_INVALID == (daemon.vpid = orte_ess.proc_get_daemon(target))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
ret = ORTE_NAME_INVALID;

Просмотреть файл

@ -576,97 +576,20 @@ static int process_commands(orte_process_name_t* sender,
break;
/**** EXIT COMMAND ****/
case ORTE_DAEMON_EXIT_WITH_REPLY_CMD:
case ORTE_DAEMON_EXIT_CMD:
if (orte_debug_daemons_flag) {
opal_output(0, "%s orted_cmd: received exit",
opal_output(0, "%s orted_cmd: received exit cmd",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
}
/* disable routing - we need to do this
* because daemons exit in an uncoordinated fashion.
* Thus, our routes are being dismantled, so we can't
* trust that any given route still exists
*/
orte_routing_is_enabled = false;
/* if we are the HNP, kill our local procs and
* flag we are exited - but don't yet exit
*/
/* if we are the HNP, just kill our local procs */
if (ORTE_PROC_IS_HNP) {
orte_job_t *daemons;
orte_proc_t **procs;
/* if we are the HNP, ensure our local procs are terminated */
orte_odls.kill_local_procs(ORTE_JOBID_WILDCARD, false);
/* now lookup the daemon job object */
if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
procs = (orte_proc_t**)daemons->procs->addr;
/* declare us terminated so things can exit cleanly */
procs[0]->state = ORTE_PROC_STATE_TERMINATED;
daemons->num_terminated++;
/* need to check for job complete as otherwise this doesn't
* get triggered in single-daemon systems
*/
orte_plm_base_check_job_completed(daemons);
/* all done! */
return ORTE_SUCCESS;
}
/* if we are not the HNP, send a message to the HNP telling
* it we are leaving - and then trigger our exit
/* else we are a daemon, trigger our exit - we will kill our
* local procs on our way out
*/
{
opal_buffer_t ack;
orte_proc_state_t state=ORTE_PROC_STATE_TERMINATED;
orte_exit_code_t exit_code=0;
orte_plm_cmd_flag_t cmd = ORTE_PLM_UPDATE_PROC_STATE;
OBJ_CONSTRUCT(&ack, opal_buffer_t);
opal_dss.pack(&ack, &cmd, 1, ORTE_PLM_CMD);
opal_dss.pack(&ack, &(ORTE_PROC_MY_NAME->jobid), 1, ORTE_JOBID);
opal_dss.pack(&ack, &(ORTE_PROC_MY_NAME->vpid), 1, ORTE_VPID);
opal_dss.pack(&ack, &state, 1, ORTE_PROC_STATE);
opal_dss.pack(&ack, &exit_code, 1, ORTE_EXIT_CODE);
orte_rml.send_buffer(ORTE_PROC_MY_HNP, &ack, ORTE_RML_TAG_PLM, 0);
OBJ_DESTRUCT(&ack);
}
orte_trigger_event(&orte_exit);
return ORTE_SUCCESS;
break;
/**** EXIT_NO_REPLY COMMAND ****/
case ORTE_DAEMON_EXIT_NO_REPLY_CMD:
if (orte_debug_daemons_flag) {
opal_output(0, "%s orted_cmd: received exit_no_reply",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
}
/* disable routing - we need to do this
* because daemons exit in an uncoordinated fashion.
* Thus, our routes are being dismantled, so we can't
* trust that any given route still exists
*/
orte_routing_is_enabled = false;
/* if we are the HNP, kill our local procs and
* flag we are exited - but don't yet exit
*/
if (ORTE_PROC_IS_HNP) {
orte_job_t *daemons;
orte_proc_t **procs;
/* if we are the HNP, ensure our local procs are terminated */
orte_odls.kill_local_procs(ORTE_JOBID_WILDCARD, false);
/* now lookup the daemon job object */
if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
procs = (orte_proc_t**)daemons->procs->addr;
/* declare us terminated so things can exit cleanly */
procs[0]->state = ORTE_PROC_STATE_TERMINATED;
daemons->num_terminated++;
/* There is nothing more to do here - actual exit will be
* accomplished by the plm
*/
return ORTE_SUCCESS;
}
orte_trigger_event(&orte_exit);
return ORTE_SUCCESS;
break;

Просмотреть файл

@ -65,6 +65,7 @@
#include "orte/runtime/orte_locks.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/odls/odls.h"

Просмотреть файл

@ -157,7 +157,7 @@ main(int argc, char *argv[])
#endif
tmp_env_var = NULL; /* Silence compiler warning */
if (ORTE_SUCCESS != (ret = orte_init(ORTE_PROC_TOOL_WNAME))) {
if (ORTE_SUCCESS != (ret = orte_init(ORTE_PROC_TOOL))) {
return ret;
}

Просмотреть файл

@ -80,6 +80,7 @@
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/runtime/runtime.h"
#include "orte/runtime/orte_globals.h"
@ -357,8 +358,6 @@ static opal_cmd_line_init_t cmd_line_init[] = {
* Local functions
*/
static void job_completed(int trigpipe, short event, void *arg);
static void terminated(int trigpipe, short event, void *arg);
static void timeout_callback(int fd, short ign, void *arg);
static void abort_signal_callback(int fd, short flags, void *arg);
static void abort_exit_callback(int fd, short event, void *arg);
static void signal_forward_callback(int fd, short event, void *arg);
@ -568,7 +567,7 @@ int orterun(int argc, char *argv[])
* and before we define signal handlers since they will call the
* exit event trigger!
*/
if (ORTE_SUCCESS != (rc = orte_wait_event(&orteds_exit_event, &orteds_exit, "orted_exit", terminated))) {
if (ORTE_SUCCESS != (rc = orte_wait_event(&orteds_exit_event, &orteds_exit, "orted_exit", just_quit))) {
orte_show_help("help-orterun.txt", "orterun:event-def-failed", true,
orterun_basename, ORTE_ERROR_NAME(rc));
goto DONE;
@ -805,7 +804,7 @@ static void job_completed(int trigpipe, short event, void *arg)
if (ORTE_SUCCESS != (rc = orte_plm.terminate_orteds())) {
/* since we know that the sends didn't completely go out,
* we know that the prior event will never fire. Add a timeout so
* we know that the barrier will never complete. Add a timeout so
* that those daemons that can respond have a chance to do
* so
*/
@ -816,104 +815,25 @@ static void job_completed(int trigpipe, short event, void *arg)
}
ORTE_DETECT_TIMEOUT(&timeout_ev, daemons->num_procs,
orte_timeout_usec_per_proc,
orte_max_timeout, timeout_callback);
orte_max_timeout, just_quit);
}
#ifndef __WINDOWS__
/* now wait to hear it has been done */
opal_event_dispatch();
#else
/* We are using WT_EXECUTEINWAITTHREAD mode of threading pool,
the other callbacks won't be triggerred until this thread finishes,
so just return to main thread and process the rest events there. */
return;
#endif
/* if we cannot order the daemons to terminate, then
* all we can do is cleanly exit ourselves
*/
/* ensure all the orteds depart together */
orte_grpcomm.onesided_barrier();
DONE:
ORTE_UPDATE_EXIT_STATUS(rc);
just_quit(0, 0, NULL);
}
static void terminated(int trigpipe, short event, void *arg)
{
orte_job_t *daemons;
orte_proc_t **procs;
orte_vpid_t i;
/* clear the event timer */
if (NULL != timeout_ev) {
opal_evtimer_del(timeout_ev);
free(timeout_ev);
}
if (signals_set) {
/* Remove the TERM and INT signal handlers */
opal_signal_del(&term_handler);
opal_signal_del(&int_handler);
#ifndef __WINDOWS__
/** Remove the USR signal handlers */
opal_signal_del(&sigusr1_handler);
opal_signal_del(&sigusr2_handler);
if (orte_forward_job_control) {
opal_signal_del(&sigtstp_handler);
opal_signal_del(&sigcont_handler);
}
#endif /* __WINDOWS__ */
signals_set = false;
}
/* get the daemon job object */
if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
/* nothing more we can do - tell user something really messed
* up and exit
*/
orte_show_help("help-orterun.txt", "orterun:no-orted-object-exit",
true, orterun_basename);
goto finish;
}
/* did any daemons fail to respond? Remember we already
* set ourselves to terminated
*/
if (daemons->num_terminated != daemons->num_procs) {
/* alert user to that fact and which nodes didn't respond and
* print a warning that the user may still have some manual
* cleanup to do.
*/
orte_show_help("help-orterun.txt", "orterun:unclean-exit",
true, orterun_basename);
procs = (orte_proc_t**)daemons->procs->addr;
for (i=1; i < daemons->num_procs; i++)
{
if (ORTE_PROC_STATE_TERMINATED != procs[i]->state) {
/* print out node name */
orte_node_t *node = procs[i]->node;
if (NULL != node && NULL != node->name) {
if (NULL != procs[i]->rml_uri) {
fprintf(stderr, "\t%s\n", node->name);
} else {
fprintf(stderr, "\t%s - daemon did not report back when launched\n", node->name);
}
}
}
}
} else {
/* we cleaned up! let the user know */
if (!orterun_globals.quiet && orte_abnormal_term_ordered){
fprintf(stderr, "%s: clean termination accomplished\n\n", orterun_basename);
}
}
finish:
/* now clean ourselves up and exit */
just_quit(0, 0, NULL);
}
static void just_quit(int fd, short ign, void *arg)
{
/* if the orted exit event is set, delete it */
if (NULL != orteds_exit_event) {
opal_evtimer_del(orteds_exit_event);
free(orteds_exit_event);
}
if (signals_set) {
/* Remove the TERM and INT signal handlers */
opal_signal_del(&term_handler);
@ -1090,14 +1010,6 @@ static void dump_aborted_procs(void)
orte_show_help("help-orterun.txt", "orterun:proc-aborted-unknown", true, orterun_basename);
}
static void timeout_callback(int fd, short ign, void *arg)
{
/* fire the trigger that takes us to terminated so we don't
* loop back into trying to kill things
*/
orte_trigger_event(&orteds_exit);
}
static void abort_exit_callback(int fd, short ign, void *arg)
{
int ret;

Просмотреть файл

@ -49,16 +49,18 @@ typedef uint32_t orte_proc_type_t;
#define ORTE_PROC_DAEMON 0x0002
#define ORTE_PROC_HNP 0x0004
#define ORTE_PROC_TOOL 0x0008
#define ORTE_PROC_TOOL_WNAME 0x0010
#define ORTE_PROC_NON_MPI 0x0010
#define ORTE_PROC_MPI 0x0020
#define ORTE_PROC_APP 0x0030
#define ORTE_PROC_CM 0x0040
#define ORTE_PROC_IS_SINGLETON (ORTE_PROC_SINGLETON & orte_process_info.proc_type)
#define ORTE_PROC_IS_DAEMON (ORTE_PROC_DAEMON & orte_process_info.proc_type)
#define ORTE_PROC_IS_HNP (ORTE_PROC_HNP & orte_process_info.proc_type)
#define ORTE_PROC_IS_TOOL (ORTE_PROC_TOOL & orte_process_info.proc_type)
#define ORTE_PROC_IS_TOOL_WNAME (ORTE_PROC_TOOL_WNAME & orte_process_info.proc_type)
#define ORTE_PROC_IS_NON_MPI (ORTE_PROC_NON_MPI & orte_process_info.proc_type)
#define ORTE_PROC_IS_MPI (ORTE_PROC_MPI & orte_process_info.proc_type)
#define ORTE_PROC_IS_APP (ORTE_PROC_APP & orte_process_info.proc_type)
#define ORTE_PROC_IS_CM (ORTE_PROC_CM & orte_process_info.proc_type)