1
1

Allow the OOB to connect between all MPI applications during MPI_INIT

without also establishing MPI connectivity. 

This commit was SVN r13595.
Этот коммит содержится в:
Brian Barrett 2007-02-09 20:17:37 +00:00
родитель 262cbbc5c9
Коммит 8b28e5b33d
6 изменённых файлов: 112 добавлений и 7 удалений

Просмотреть файл

@ -10,6 +10,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -110,6 +112,7 @@ extern "C" {
* be made if they will be made). * be made if they will be made).
*/ */
int ompi_init_do_preconnect(void); int ompi_init_do_preconnect(void);
int ompi_init_do_oob_preconnect(void);
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)
} }

Просмотреть файл

@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved.
* *
@ -661,6 +661,27 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
gettimeofday(&ompistart, NULL); gettimeofday(&ompistart, NULL);
} }
/* wire up the oob interface, if requested. Do this here because
it will go much faster before the event library is switched
into non-blocking mode */
if (ompi_mpi_preconnect_oob) {
if (OMPI_SUCCESS != (ret = ompi_init_do_oob_preconnect())) {
error = "ompi_mpi_do_preconnect_oob() failed";
goto error;
}
}
/* check for timing request - get stop time and report elapsed
time if so, then start the clock again */
if (timing) {
gettimeofday(&ompistop, NULL);
opal_output(0, "ompi_mpi_init[%ld]: time from stage 2 cast to complete oob wireup %ld usec",
(long)ORTE_PROC_MY_NAME->vpid,
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
(ompistop.tv_usec - ompistart.tv_usec)));
gettimeofday(&ompistart, NULL);
}
#if OMPI_ENABLE_PROGRESS_THREADS == 0 #if OMPI_ENABLE_PROGRESS_THREADS == 0
/* Start setting up the event engine for MPI operations. Don't /* Start setting up the event engine for MPI operations. Don't
block in the event library, so that communications don't take block in the event library, so that communications don't take
@ -733,14 +754,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* At this point, we are fully configured and in MPI mode. Any /* At this point, we are fully configured and in MPI mode. Any
communication calls here will work exactly like they would in communication calls here will work exactly like they would in
the user's code. Setup the connections between procs and warm the user's code. Setup the connections between procs and warm
them up with simple sends, if requested*/ them up with simple sends, if requested */
if (ompi_mpi_preconnect_all) { if (ompi_mpi_preconnect_all) {
if (OMPI_SUCCESS != (ret = ompi_init_do_preconnect())) { if (OMPI_SUCCESS != (ret = ompi_init_do_preconnect())) {
error = "ompi_mpi_do_preconnect_all() failed"; error = "ompi_mpi_do_preconnect_all() failed";
/* This will loop back up above, but ret != OMPI_SUCCESS,
so we'll end up returning out of this function before
getting here (and therefore avoiding an infinite
loop) */
goto error; goto error;
} }
} }
@ -769,7 +786,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* check for timing request - get stop time and report elapsed time if so */ /* check for timing request - get stop time and report elapsed time if so */
if (timing) { if (timing) {
gettimeofday(&ompistop, NULL); gettimeofday(&ompistop, NULL);
opal_output(0, "ompi_mpi_init[%ld]: time from stage2 xcast to complete mpi_init %ld usec", opal_output(0, "ompi_mpi_init[%ld]: time from oob wireup to complete mpi_init %ld usec",
(long)ORTE_PROC_MY_NAME->vpid, (long)ORTE_PROC_MY_NAME->vpid,
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
(ompistop.tv_usec - ompistart.tv_usec))); (ompistop.tv_usec - ompistart.tv_usec)));

Просмотреть файл

@ -10,6 +10,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -50,6 +52,7 @@ bool ompi_mpi_abort_print_stack = false;
int ompi_mpi_abort_delay = 0; int ompi_mpi_abort_delay = 0;
bool ompi_mpi_keep_peer_hostnames = true; bool ompi_mpi_keep_peer_hostnames = true;
bool ompi_mpi_preconnect_all = false; bool ompi_mpi_preconnect_all = false;
bool ompi_mpi_preconnect_oob = false;
bool ompi_mpi_leave_pinned = false; bool ompi_mpi_leave_pinned = false;
bool ompi_mpi_leave_pinned_pipeline = false; bool ompi_mpi_leave_pinned_pipeline = false;
@ -190,6 +193,13 @@ int ompi_mpi_register_params(void)
ompi_mpi_preconnect_all = OPAL_INT_TO_BOOL(value); ompi_mpi_preconnect_all = OPAL_INT_TO_BOOL(value);
mca_base_param_reg_int_name("mpi", "preconnect_oob",
"Whether to force MPI processes to fully wire-up the OOB system between MPI processes.",
false, false,
(int) ompi_mpi_preconnect_oob, &value);
ompi_mpi_preconnect_oob = OPAL_INT_TO_BOOL(value);
/* Leave pinned parameter */ /* Leave pinned parameter */
mca_base_param_reg_int_name("mpi", "leave_pinned", mca_base_param_reg_int_name("mpi", "leave_pinned",

Просмотреть файл

@ -5,6 +5,8 @@
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -20,6 +22,8 @@
#include "ompi/communicator/communicator.h" #include "ompi/communicator/communicator.h"
#include "ompi/request/request.h" #include "ompi/request/request.h"
#include "ompi/runtime/mpiruntime.h" #include "ompi/runtime/mpiruntime.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h"
/* /*
* do zero byte IRECV / ISEND: upper half sends to lower half (i.e. do * do zero byte IRECV / ISEND: upper half sends to lower half (i.e. do
@ -80,4 +84,65 @@ int ompi_init_do_preconnect(void)
return ret; return ret;
} }
int ompi_init_do_oob_preconnect(void)
{
size_t world_size, i, next, prev, my_index;
ompi_proc_t **procs;
int ret;
struct iovec msg[1];
procs = ompi_proc_world(&world_size);
msg[0].iov_base = NULL;
msg[0].iov_len = 0;
if (world_size == 2) {
if (ompi_proc_local() == procs[0]) {
ret = orte_rml.send(&procs[1]->proc_name,
msg,
1,
ORTE_RML_TAG_WIREUP,
0);
if (ret < 0) return ret;
} else {
ret = orte_rml.recv(&procs[0]->proc_name,
msg,
1,
ORTE_RML_TAG_WIREUP,
0);
if (ret < 0) return ret;
}
} else if (world_size > 2) {
for (i = 0 ; i < world_size ; ++i) {
if (ompi_proc_local() == procs[i]) {
my_index = i;
break;
}
}
for (i = 1 ; i <= world_size / 2 ; ++i) {
next = (my_index + i) % world_size;
prev = (my_index - i + world_size) % world_size;
/* sends do not wait for a match */
ret = orte_rml.send(&procs[next]->proc_name,
msg,
1,
ORTE_RML_TAG_WIREUP,
0);
if (ret < 0) return ret;
ret = orte_rml.recv(&procs[prev]->proc_name,
msg,
1,
ORTE_RML_TAG_WIREUP,
0);
if (ret < 0) return ret;
}
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -9,6 +9,8 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -107,6 +109,11 @@ OMPI_DECLSPEC extern bool ompi_mpi_paffinity_alone;
*/ */
OMPI_DECLSPEC extern bool ompi_mpi_preconnect_all; OMPI_DECLSPEC extern bool ompi_mpi_preconnect_all;
/**
* should we wireup the oob completely during MPI_INIT?
*/
OMPI_DECLSPEC extern bool ompi_mpi_preconnect_oob;
/** /**
* Whether MPI_ABORT should print out an identifying message * Whether MPI_ABORT should print out an identifying message
* (e.g., hostname and PID) and loop waiting for a debugger to * (e.g., hostname and PID) and loop waiting for a debugger to

Просмотреть файл

@ -9,6 +9,8 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -54,6 +56,7 @@ typedef uint32_t orte_rml_tag_t;
#define ORTE_RML_TAG_BPROC 17 #define ORTE_RML_TAG_BPROC 17
#define ORTE_RML_TAG_BPROC_ABORT 18 #define ORTE_RML_TAG_BPROC_ABORT 18
#define ORTE_RML_TAG_SM_BACK_FILE_CREATED 19 #define ORTE_RML_TAG_SM_BACK_FILE_CREATED 19
#define ORTE_RML_TAG_WIREUP 20
#define ORTE_RML_TAG_DYNAMIC 2000 #define ORTE_RML_TAG_DYNAMIC 2000