1
1

Allow the OOB to connect between all MPI applications during MPI_INIT

without also establishing MPI connectivity. 

This commit was SVN r13595.
Этот коммит содержится в:
Brian Barrett 2007-02-09 20:17:37 +00:00
родитель 262cbbc5c9
Коммит 8b28e5b33d
6 изменённых файлов: 112 добавлений и 7 удалений

Просмотреть файл

@ -10,6 +10,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -110,6 +112,7 @@ extern "C" {
* be made if they will be made). * be made if they will be made).
*/ */
int ompi_init_do_preconnect(void); int ompi_init_do_preconnect(void);
int ompi_init_do_oob_preconnect(void);
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)
} }

Просмотреть файл

@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved.
* *
@ -661,6 +661,27 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
gettimeofday(&ompistart, NULL); gettimeofday(&ompistart, NULL);
} }
/* wire up the oob interface, if requested. Do this here because
it will go much faster before the event library is switched
into non-blocking mode */
if (ompi_mpi_preconnect_oob) {
if (OMPI_SUCCESS != (ret = ompi_init_do_oob_preconnect())) {
error = "ompi_mpi_do_preconnect_oob() failed";
goto error;
}
}
/* check for timing request - get stop time and report elapsed
time if so, then start the clock again */
if (timing) {
gettimeofday(&ompistop, NULL);
opal_output(0, "ompi_mpi_init[%ld]: time from stage 2 cast to complete oob wireup %ld usec",
(long)ORTE_PROC_MY_NAME->vpid,
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
(ompistop.tv_usec - ompistart.tv_usec)));
gettimeofday(&ompistart, NULL);
}
#if OMPI_ENABLE_PROGRESS_THREADS == 0 #if OMPI_ENABLE_PROGRESS_THREADS == 0
/* Start setting up the event engine for MPI operations. Don't /* Start setting up the event engine for MPI operations. Don't
block in the event library, so that communications don't take block in the event library, so that communications don't take
@ -737,10 +758,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
if (ompi_mpi_preconnect_all) { if (ompi_mpi_preconnect_all) {
if (OMPI_SUCCESS != (ret = ompi_init_do_preconnect())) { if (OMPI_SUCCESS != (ret = ompi_init_do_preconnect())) {
error = "ompi_mpi_do_preconnect_all() failed"; error = "ompi_mpi_do_preconnect_all() failed";
/* This will loop back up above, but ret != OMPI_SUCCESS,
so we'll end up returning out of this function before
getting here (and therefore avoiding an infinite
loop) */
goto error; goto error;
} }
} }
@ -769,7 +786,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* check for timing request - get stop time and report elapsed time if so */ /* check for timing request - get stop time and report elapsed time if so */
if (timing) { if (timing) {
gettimeofday(&ompistop, NULL); gettimeofday(&ompistop, NULL);
opal_output(0, "ompi_mpi_init[%ld]: time from stage2 xcast to complete mpi_init %ld usec", opal_output(0, "ompi_mpi_init[%ld]: time from oob wireup to complete mpi_init %ld usec",
(long)ORTE_PROC_MY_NAME->vpid, (long)ORTE_PROC_MY_NAME->vpid,
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 + (long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
(ompistop.tv_usec - ompistart.tv_usec))); (ompistop.tv_usec - ompistart.tv_usec)));

Просмотреть файл

@ -10,6 +10,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -50,6 +52,7 @@ bool ompi_mpi_abort_print_stack = false;
int ompi_mpi_abort_delay = 0; int ompi_mpi_abort_delay = 0;
bool ompi_mpi_keep_peer_hostnames = true; bool ompi_mpi_keep_peer_hostnames = true;
bool ompi_mpi_preconnect_all = false; bool ompi_mpi_preconnect_all = false;
bool ompi_mpi_preconnect_oob = false;
bool ompi_mpi_leave_pinned = false; bool ompi_mpi_leave_pinned = false;
bool ompi_mpi_leave_pinned_pipeline = false; bool ompi_mpi_leave_pinned_pipeline = false;
@ -190,6 +193,13 @@ int ompi_mpi_register_params(void)
ompi_mpi_preconnect_all = OPAL_INT_TO_BOOL(value); ompi_mpi_preconnect_all = OPAL_INT_TO_BOOL(value);
mca_base_param_reg_int_name("mpi", "preconnect_oob",
"Whether to force MPI processes to fully wire-up the OOB system between MPI processes.",
false, false,
(int) ompi_mpi_preconnect_oob, &value);
ompi_mpi_preconnect_oob = OPAL_INT_TO_BOOL(value);
/* Leave pinned parameter */ /* Leave pinned parameter */
mca_base_param_reg_int_name("mpi", "leave_pinned", mca_base_param_reg_int_name("mpi", "leave_pinned",

Просмотреть файл

@ -5,6 +5,8 @@
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -20,6 +22,8 @@
#include "ompi/communicator/communicator.h" #include "ompi/communicator/communicator.h"
#include "ompi/request/request.h" #include "ompi/request/request.h"
#include "ompi/runtime/mpiruntime.h" #include "ompi/runtime/mpiruntime.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h"
/* /*
* do zero byte IRECV / ISEND: upper half sends to lower half (i.e. do * do zero byte IRECV / ISEND: upper half sends to lower half (i.e. do
@ -81,3 +85,64 @@ int ompi_init_do_preconnect(void)
return ret; return ret;
} }
int ompi_init_do_oob_preconnect(void)
{
size_t world_size, i, next, prev, my_index;
ompi_proc_t **procs;
int ret;
struct iovec msg[1];
procs = ompi_proc_world(&world_size);
msg[0].iov_base = NULL;
msg[0].iov_len = 0;
if (world_size == 2) {
if (ompi_proc_local() == procs[0]) {
ret = orte_rml.send(&procs[1]->proc_name,
msg,
1,
ORTE_RML_TAG_WIREUP,
0);
if (ret < 0) return ret;
} else {
ret = orte_rml.recv(&procs[0]->proc_name,
msg,
1,
ORTE_RML_TAG_WIREUP,
0);
if (ret < 0) return ret;
}
} else if (world_size > 2) {
for (i = 0 ; i < world_size ; ++i) {
if (ompi_proc_local() == procs[i]) {
my_index = i;
break;
}
}
for (i = 1 ; i <= world_size / 2 ; ++i) {
next = (my_index + i) % world_size;
prev = (my_index - i + world_size) % world_size;
/* sends do not wait for a match */
ret = orte_rml.send(&procs[next]->proc_name,
msg,
1,
ORTE_RML_TAG_WIREUP,
0);
if (ret < 0) return ret;
ret = orte_rml.recv(&procs[prev]->proc_name,
msg,
1,
ORTE_RML_TAG_WIREUP,
0);
if (ret < 0) return ret;
}
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -9,6 +9,8 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -107,6 +109,11 @@ OMPI_DECLSPEC extern bool ompi_mpi_paffinity_alone;
*/ */
OMPI_DECLSPEC extern bool ompi_mpi_preconnect_all; OMPI_DECLSPEC extern bool ompi_mpi_preconnect_all;
/**
* should we wireup the oob completely during MPI_INIT?
*/
OMPI_DECLSPEC extern bool ompi_mpi_preconnect_oob;
/** /**
* Whether MPI_ABORT should print out an identifying message * Whether MPI_ABORT should print out an identifying message
* (e.g., hostname and PID) and loop waiting for a debugger to * (e.g., hostname and PID) and loop waiting for a debugger to

Просмотреть файл

@ -9,6 +9,8 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -54,6 +56,7 @@ typedef uint32_t orte_rml_tag_t;
#define ORTE_RML_TAG_BPROC 17 #define ORTE_RML_TAG_BPROC 17
#define ORTE_RML_TAG_BPROC_ABORT 18 #define ORTE_RML_TAG_BPROC_ABORT 18
#define ORTE_RML_TAG_SM_BACK_FILE_CREATED 19 #define ORTE_RML_TAG_SM_BACK_FILE_CREATED 19
#define ORTE_RML_TAG_WIREUP 20
#define ORTE_RML_TAG_DYNAMIC 2000 #define ORTE_RML_TAG_DYNAMIC 2000