Discovered while documenting the "preconnect" mca params that several of them didn't make sense any more. After chatting with Jeff, we agreed to the following:
1. register "mpi_preconnect_all" as a deprecated synonym for "mpi_preconnect_mpi" 2. remove "mpi_preconnect_oob" and "mpi_preconnect_oob_simultaneous" as these are no longer valid. 3. remove the routed framework's "warmup_routes" API. With the removal of the direct routed component, this function at best only wasted communications. The daemon routes are completely "warmed up" during launch, so having MPI procs order the sending of additional messages is simply wasteful. 4. remove the call to orte_routed.warmup_routes from MPI_Init. This was the only place it was used anyway. The FAQs will be updated to reflect this changed situation, and a CMR filed to move this to the 1.3 branch. This commit was SVN r19933.
Этот коммит содержится в:
родитель
cad49da72d
Коммит
25491628b8
@ -30,6 +30,9 @@
|
||||
#if HAVE_PTHREAD_H
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
@ -52,10 +55,6 @@
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#endif
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mpi/f77/constants.h"
|
||||
#include "ompi/runtime/mpiruntime.h"
|
||||
@ -346,16 +345,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
}
|
||||
orte_setup = true;
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
/* warmup the OOB routes. Do this here because
|
||||
it will go much faster before the event library is switched
|
||||
into non-blocking mode */
|
||||
if (OMPI_SUCCESS != (ret = orte_routed.warmup_routes())) {
|
||||
error = "orte_routed_warmup_routes() failed";
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* check for timing request - get stop time and report elapsed time if so */
|
||||
if (timing && 0 == ORTE_PROC_MY_NAME->vpid) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
|
@ -224,30 +224,16 @@ int ompi_mpi_register_params(void)
|
||||
ompi_mpi_abort_print_stack = false;
|
||||
#endif
|
||||
|
||||
mca_base_param_reg_int_name("mpi", "preconnect_all",
|
||||
"Whether to force MPI processes to create OOB "
|
||||
"and MPI connections with *all* peers during "
|
||||
"MPI_INIT (vs. making connections lazily -- "
|
||||
"upon the first MPI traffic between each "
|
||||
"process peer pair)",
|
||||
false, false, 0, NULL);
|
||||
|
||||
mca_base_param_reg_int_name("mpi", "preconnect_mpi",
|
||||
"Whether to force MPI processes to fully "
|
||||
"wire-up the MPI connections between MPI "
|
||||
"processes.",
|
||||
false, false, 0, NULL);
|
||||
|
||||
mca_base_param_reg_int_name("mpi", "preconnect_oob",
|
||||
"Whether to force MPI processes to fully "
|
||||
"wire-up the OOB system between MPI processes.",
|
||||
false, false, 0, NULL);
|
||||
|
||||
mca_base_param_reg_int_name("mpi", "preconnect_oob_simultaneous",
|
||||
"Number of simultaneous outstanding "
|
||||
"OOB connections to allow during preconnect.",
|
||||
false, false, 4, NULL);
|
||||
|
||||
value = mca_base_param_reg_int_name("mpi", "preconnect_mpi",
|
||||
"Whether to force MPI processes to fully "
|
||||
"wire-up the MPI connections between MPI "
|
||||
"processes during "
|
||||
"MPI_INIT (vs. making connections lazily -- "
|
||||
"upon the first MPI traffic between each "
|
||||
"process peer pair)",
|
||||
false, false, 0, NULL);
|
||||
mca_base_param_reg_syn_name(value, "mpi", "preconnect_all", true);
|
||||
|
||||
/* Leave pinned parameter */
|
||||
|
||||
mca_base_param_reg_int_name("mpi", "leave_pinned",
|
||||
|
@ -37,14 +37,9 @@ ompi_init_preconnect_mpi(void)
|
||||
param = mca_base_param_find("mpi", NULL, "preconnect_mpi");
|
||||
if (OMPI_ERROR == param) return OMPI_SUCCESS;
|
||||
ret = mca_base_param_lookup_int(param, &value);
|
||||
if (OMPI_SUCCESS != ret) return OMPI_SUCCESS;
|
||||
if (0 == value) {
|
||||
param = mca_base_param_find("mpi", NULL, "preconnect_all");
|
||||
if (OMPI_ERROR == param) return OMPI_SUCCESS;
|
||||
ret = mca_base_param_lookup_int(param, &value);
|
||||
if (OMPI_SUCCESS != ret) return OMPI_SUCCESS;
|
||||
if (OMPI_SUCCESS != ret || 0 == value) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
if (0 == value) return OMPI_SUCCESS;
|
||||
|
||||
inbuf[0] = outbuf[0] = '\0';
|
||||
|
||||
|
@ -47,7 +47,6 @@ static int update_routing_tree(void);
|
||||
static orte_vpid_t get_routing_tree(orte_jobid_t job, opal_list_t *children);
|
||||
static bool proc_is_below(orte_vpid_t root, orte_vpid_t target);
|
||||
static int get_wireup_info(opal_buffer_t *buf);
|
||||
static int warmup_routes(void);
|
||||
|
||||
#if OPAL_ENABLE_FT == 1
|
||||
static int binomial_ft_event(int state);
|
||||
@ -60,7 +59,6 @@ orte_routed_module_t orte_routed_binomial_module = {
|
||||
update_route,
|
||||
get_route,
|
||||
init_routes,
|
||||
warmup_routes,
|
||||
route_lost,
|
||||
route_is_defined,
|
||||
update_routing_tree,
|
||||
@ -765,24 +763,6 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
}
|
||||
}
|
||||
|
||||
static int warmup_routes(void)
|
||||
{
|
||||
opal_buffer_t buf;
|
||||
orte_daemon_cmd_flag_t cmd=ORTE_DAEMON_NULL_CMD;
|
||||
int rc;
|
||||
|
||||
/* send a NULL command to my parent */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
opal_dss.pack(&buf, &cmd, 1, ORTE_DAEMON_CMD);
|
||||
if (0 > (rc = orte_rml.send_buffer(&my_parent, &buf, ORTE_RML_TAG_DAEMON, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int route_lost(const orte_process_name_t *route)
|
||||
{
|
||||
/* if we lose the connection to the lifeline and we are NOT already,
|
||||
|
@ -45,7 +45,6 @@ static int update_routing_tree(void);
|
||||
static orte_vpid_t get_routing_tree(orte_jobid_t job, opal_list_t *children);
|
||||
static bool proc_is_below(orte_vpid_t root, orte_vpid_t target);
|
||||
static int get_wireup_info(opal_buffer_t *buf);
|
||||
static int warmup_routes(void);
|
||||
|
||||
#if OPAL_ENABLE_FT == 1
|
||||
static int linear_ft_event(int state);
|
||||
@ -58,7 +57,6 @@ orte_routed_module_t orte_routed_linear_module = {
|
||||
update_route,
|
||||
get_route,
|
||||
init_routes,
|
||||
warmup_routes,
|
||||
route_lost,
|
||||
route_is_defined,
|
||||
update_routing_tree,
|
||||
@ -758,11 +756,6 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
}
|
||||
}
|
||||
|
||||
static int warmup_routes(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int route_lost(const orte_process_name_t *route)
|
||||
{
|
||||
/* if we lose the connection to the lifeline and we are NOT already,
|
||||
|
@ -48,7 +48,6 @@ static int update_routing_tree(void);
|
||||
static orte_vpid_t get_routing_tree(orte_jobid_t job, opal_list_t *children);
|
||||
static bool proc_is_below(orte_vpid_t root, orte_vpid_t target);
|
||||
static int get_wireup_info(opal_buffer_t *buf);
|
||||
static int warmup_routes(void);
|
||||
|
||||
#if OPAL_ENABLE_FT == 1
|
||||
static int radix_ft_event(int state);
|
||||
@ -61,7 +60,6 @@ orte_routed_module_t orte_routed_radix_module = {
|
||||
update_route,
|
||||
get_route,
|
||||
init_routes,
|
||||
warmup_routes,
|
||||
route_lost,
|
||||
route_is_defined,
|
||||
update_routing_tree,
|
||||
@ -792,24 +790,6 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
}
|
||||
}
|
||||
|
||||
static int warmup_routes(void)
|
||||
{
|
||||
opal_buffer_t buf;
|
||||
orte_daemon_cmd_flag_t cmd=ORTE_DAEMON_NULL_CMD;
|
||||
int rc;
|
||||
|
||||
/* send a NULL command to my parent */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
opal_dss.pack(&buf, &cmd, 1, ORTE_DAEMON_CMD);
|
||||
if (0 > (rc = orte_rml.send_buffer(&my_parent, &buf, ORTE_RML_TAG_DAEMON, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int route_lost(const orte_process_name_t *route)
|
||||
{
|
||||
/* if we lose the connection to the lifeline and we are NOT already,
|
||||
|
@ -223,15 +223,6 @@ typedef orte_vpid_t (*orte_routed_module_get_routing_tree_fn_t)(orte_jobid_t job
|
||||
*/
|
||||
typedef bool (*orte_routed_module_proc_is_below_fn_t)(orte_vpid_t root, orte_vpid_t target);
|
||||
|
||||
/*
|
||||
* Warmup routes
|
||||
*
|
||||
* Preconnect the module's routes so that the sockets are created
|
||||
* and ready for messaging. Sends 0-byte messages to those
|
||||
* processes that are directly connected
|
||||
*/
|
||||
typedef int (*orte_routed_module_warmup_routes_fn_t)(void);
|
||||
|
||||
/**
|
||||
* Handle fault tolerance updates
|
||||
*
|
||||
@ -261,7 +252,6 @@ struct orte_routed_module_t {
|
||||
orte_routed_module_update_route_fn_t update_route;
|
||||
orte_routed_module_get_route_fn_t get_route;
|
||||
orte_routed_module_init_routes_fn_t init_routes;
|
||||
orte_routed_module_warmup_routes_fn_t warmup_routes;
|
||||
orte_routed_module_route_lost_fn_t route_lost;
|
||||
orte_routed_module_route_is_defined_fn_t route_is_defined;
|
||||
/* fns for daemons */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user