Minimize the amount of topology info returned by the daemons. Most clusters, especially at scale, use the same node topology on every node, so there is no re
ason to return the topology from every daemon. Borrow a page from the --hetero-apps page and let users indicate that the node topology differs by adding a -- hetero-nodes option to mpirun. If the option is set, then every daemon returns topology info. If not set, then only daemon vpid=1 returns it. We always want one daemon to return the topology as the head node is often different from the compute nodes. Having one daemon return the compute node topolo gy allows us to detect any such difference. All compute nodes are then set to the same topology. This commit was SVN r25408.
Этот коммит содержится в:
родитель
14966e0f8f
Коммит
d28dd55d33
@ -510,6 +510,8 @@ static int rte_init(void)
|
||||
#if OPAL_HAVE_HWLOC
|
||||
/* point our topology to the one detected locally */
|
||||
node->topology = opal_hwloc_topology;
|
||||
/* add it to the array of known topologies */
|
||||
opal_pointer_array_add(orte_node_topologies, opal_hwloc_topology);
|
||||
#endif
|
||||
|
||||
/* create and store a proc object for us */
|
||||
|
@ -31,7 +31,6 @@
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/opal_sos.h"
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
#include "opal/dss/dss.h"
|
||||
@ -549,15 +548,15 @@ static void process_orted_launch_report(int fd, short event, void *data)
|
||||
idx=1;
|
||||
node = daemon->node;
|
||||
if (OPAL_SUCCESS == opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO)) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s RECEIVED TOPOLOGY FROM NODE %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodename));
|
||||
/* do we already have this topology from some other node? */
|
||||
found = false;
|
||||
for (i=0; i < orte_node_topologies->size; i++) {
|
||||
if (NULL == (t = (hwloc_topology_t)opal_pointer_array_get_item(orte_node_topologies, i))) {
|
||||
continue;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s RECEIVED TOPOLOGY FROM NODE %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodename));
|
||||
if (OPAL_EQUAL == opal_dss.compare(topo, t, OPAL_HWLOC_TOPO)) {
|
||||
/* yes - just point to it */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
@ -629,7 +628,7 @@ static void orted_report_launch(int status, orte_process_name_t* sender,
|
||||
/* reissue the recv */
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ORTED_CALLBACK,
|
||||
ORTE_RML_NON_PERSISTENT, orted_report_launch, NULL);
|
||||
if (rc != ORTE_SUCCESS && OPAL_SOS_GET_ERROR_CODE(rc) != ORTE_ERR_NOT_IMPLEMENTED) {
|
||||
if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
orted_failed_launch = true;
|
||||
}
|
||||
@ -654,7 +653,7 @@ int orte_plm_base_daemon_callback(orte_std_cntr_t num_daemons)
|
||||
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ORTED_CALLBACK,
|
||||
ORTE_RML_NON_PERSISTENT, orted_report_launch, NULL);
|
||||
if (rc != ORTE_SUCCESS && OPAL_SOS_GET_ERROR_CODE(rc) != ORTE_ERR_NOT_IMPLEMENTED) {
|
||||
if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
@ -667,6 +666,36 @@ int orte_plm_base_daemon_callback(orte_std_cntr_t num_daemons)
|
||||
return rc;
|
||||
}
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
{
|
||||
hwloc_topology_t t;
|
||||
orte_node_t *node;
|
||||
int i;
|
||||
|
||||
/* if the user didn't indicate that the node topologies were
|
||||
* different, then set the nodes to point to the topology
|
||||
* of the first node.
|
||||
*
|
||||
* NOTE: We do -not- point the nodes at the topology of
|
||||
* mpirun because many "homogeneous" clusters have a head
|
||||
* node that differs from all the compute nodes!
|
||||
*/
|
||||
if (!orte_hetero_nodes) {
|
||||
if (NULL == (t = (hwloc_topology_t)opal_pointer_array_get_item(orte_node_topologies, 1))) {
|
||||
/* got a problem */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
for (i=2; i < orte_node_pool->size; i++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
|
||||
continue;
|
||||
}
|
||||
node->topology = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* if we are timing, output the results */
|
||||
if (orte_timing) {
|
||||
int64_t sec, usec;
|
||||
@ -751,7 +780,10 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
||||
if (orte_report_bindings) {
|
||||
opal_argv_append(argc, argv, "--report-bindings");
|
||||
}
|
||||
|
||||
if (orte_hetero_nodes) {
|
||||
opal_argv_append(argc, argv, "--hetero-nodes");
|
||||
}
|
||||
|
||||
if ((int)ORTE_VPID_INVALID != orted_debug_failure) {
|
||||
opal_argv_append(argc, argv, "--debug-failure");
|
||||
asprintf(¶m, "%d", orted_debug_failure);
|
||||
|
@ -195,6 +195,10 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
|
||||
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"Regular expression defining nodes in system" },
|
||||
|
||||
{ "orte", "hetero", "nodes", '\0', NULL, "hetero-nodes", 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Nodes in cluster may differ in topology, so send the topology back from each node [Default = false]" },
|
||||
|
||||
/* End of list */
|
||||
{ NULL, NULL, NULL, '\0', NULL, NULL, 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
|
||||
@ -643,7 +647,8 @@ int orte_daemon(int argc, char *argv[])
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
/* add the local topology */
|
||||
if (NULL != opal_hwloc_topology) {
|
||||
if (NULL != opal_hwloc_topology &&
|
||||
(1 == ORTE_PROC_MY_NAME->vpid || orte_hetero_nodes)) {
|
||||
if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
}
|
||||
|
@ -73,6 +73,7 @@ int orted_debug_failure;
|
||||
int orted_debug_failure_delay;
|
||||
bool orte_homogeneous_nodes = false;
|
||||
bool orte_hetero_apps = false;
|
||||
bool orte_hetero_nodes = false;
|
||||
bool orte_never_launched = false;
|
||||
bool orte_devel_level_output = false;
|
||||
bool orte_display_topo_with_map = false;
|
||||
|
@ -601,6 +601,7 @@ ORTE_DECLSPEC extern int orted_debug_failure;
|
||||
ORTE_DECLSPEC extern int orted_debug_failure_delay;
|
||||
ORTE_DECLSPEC extern bool orte_homogeneous_nodes;
|
||||
ORTE_DECLSPEC extern bool orte_hetero_apps;
|
||||
ORTE_DECLSPEC extern bool orte_hetero_nodes;
|
||||
ORTE_DECLSPEC extern bool orte_never_launched;
|
||||
ORTE_DECLSPEC extern bool orte_devel_level_output;
|
||||
ORTE_DECLSPEC extern bool orte_display_topo_with_map;
|
||||
|
@ -315,6 +315,10 @@ int orte_register_params(void)
|
||||
"Indicates that multiple app_contexts are being provided that are a mix of 32/64 bit binaries (default: false)",
|
||||
false, false, (int) false, &value);
|
||||
orte_hetero_apps = OPAL_INT_TO_BOOL(value);
|
||||
mca_base_param_reg_int_name("orte", "hetero_nodes",
|
||||
"Nodes in cluster may differ in topology, so send the topology back from each node [Default = false]",
|
||||
false, false, (int) false, &value);
|
||||
orte_hetero_nodes = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
/* allow specification of the launch agent */
|
||||
mca_base_param_reg_string_name("orte", "launch_agent",
|
||||
|
@ -143,7 +143,7 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
||||
"Return the exit status of the primary job only" },
|
||||
|
||||
/* hetero apps */
|
||||
{ "orte", "hetero", "apps", '\0', NULL, "hetero", 0,
|
||||
{ "orte", "hetero", "apps", '\0', NULL, "hetero-apps", 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Indicates that multiple app_contexts are being provided that are a mix of 32/64 bit binaries" },
|
||||
|
||||
@ -436,6 +436,10 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Launch daemons on all nodes at start to create a virtual machine [Default = false]" },
|
||||
|
||||
{ "orte", "hetero", "nodes", '\0', NULL, "hetero-nodes", 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Nodes in cluster may differ in topology, so send the topology back from each node [Default = false]" },
|
||||
|
||||
#if OPAL_ENABLE_CRDEBUG == 1
|
||||
{ "opal", "cr", "enable_crdebug", '\0', "crdebug", "crdebug", 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user