1
1

Allow the OOB to connect between all MPI applications during MPI_INIT

without also establishing MPI connectivity.

This commit was SVN r13593.
Этот коммит содержится в:
Brian Barrett 2007-02-09 20:11:40 +00:00
родитель 418f9ee5c8
Коммит 81472363ea
7 изменённых файлов: 104 добавлений и 9 удалений

Просмотреть файл

@ -110,6 +110,7 @@ extern "C" {
* be made if they will be made).
*/
int ompi_init_do_preconnect(void);
int ompi_init_do_oob_preconnect(void);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -661,6 +661,27 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
gettimeofday(&ompistart, NULL);
}
/* wire up the oob interface, if requested. Do this here because
it will go much faster before the event library is switched
into non-blocking mode */
if (ompi_mpi_preconnect_oob) {
if (OMPI_SUCCESS != (ret = ompi_init_do_oob_preconnect())) {
error = "ompi_mpi_do_preconnect_oob() failed";
goto error;
}
}
/* check for timing request - get stop time and report elapsed
time if so, then start the clock again */
if (timing) {
gettimeofday(&ompistop, NULL);
opal_output(0, "ompi_mpi_init[%ld]: time from stage 2 cast to complete oob wireup %ld usec",
(long)ORTE_PROC_MY_NAME->vpid,
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
(ompistop.tv_usec - ompistart.tv_usec)));
gettimeofday(&ompistart, NULL);
}
#if OMPI_ENABLE_PROGRESS_THREADS == 0
/* Start setting up the event engine for MPI operations. Don't
block in the event library, so that communications don't take
@ -737,10 +758,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
if (ompi_mpi_preconnect_all) {
if (OMPI_SUCCESS != (ret = ompi_init_do_preconnect())) {
error = "ompi_mpi_do_preconnect_all() failed";
/* This will loop back up above, but ret != OMPI_SUCCESS,
so we'll end up returning out of this function before
getting here (and therefore avoiding an infinite
loop) */
goto error;
}
}
@ -769,7 +786,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* check for timing request - get stop time and report elapsed time if so */
if (timing) {
gettimeofday(&ompistop, NULL);
opal_output(0, "ompi_mpi_init[%ld]: time from stage2 xcast to complete mpi_init %ld usec",
opal_output(0, "ompi_mpi_init[%ld]: time from oob wireup to complete mpi_init %ld usec",
(long)ORTE_PROC_MY_NAME->vpid,
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
(ompistop.tv_usec - ompistart.tv_usec)));

Просмотреть файл

@ -50,6 +50,7 @@ bool ompi_mpi_abort_print_stack = false;
int ompi_mpi_abort_delay = 0;
bool ompi_mpi_keep_peer_hostnames = true;
bool ompi_mpi_preconnect_all = false;
bool ompi_mpi_preconnect_oob = false;
bool ompi_mpi_leave_pinned = false;
bool ompi_mpi_leave_pinned_pipeline = false;
@ -190,6 +191,13 @@ int ompi_mpi_register_params(void)
ompi_mpi_preconnect_all = OPAL_INT_TO_BOOL(value);
mca_base_param_reg_int_name("mpi", "preconnect_oob",
"Whether to force MPI processes to fully wire-up the OOB system between MPI processes.",
false, false,
(int) ompi_mpi_preconnect_oob, &value);
ompi_mpi_preconnect_oob = OPAL_INT_TO_BOOL(value);
/* Leave pinned parameter */
mca_base_param_reg_int_name("mpi", "leave_pinned",

Просмотреть файл

@ -20,6 +20,8 @@
#include "ompi/communicator/communicator.h"
#include "ompi/request/request.h"
#include "ompi/runtime/mpiruntime.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h"
/*
* do zero byte IRECV / ISEND: upper half sends to lower half (i.e. do
@ -81,3 +83,64 @@ int ompi_init_do_preconnect(void)
return ret;
}
int ompi_init_do_oob_preconnect(void)
{
size_t world_size, i, next, prev, my_index;
ompi_proc_t **procs;
int ret;
struct iovec msg[1];
procs = ompi_proc_world(&world_size);
msg[0].iov_base = NULL;
msg[0].iov_len = 0;
if (world_size == 2) {
if (ompi_proc_local() == procs[0]) {
ret = orte_rml.send(&procs[1]->proc_name,
msg,
1,
ORTE_RML_TAG_WIREUP,
0);
if (ret < 0) return ret;
} else {
ret = orte_rml.recv(&procs[0]->proc_name,
msg,
1,
ORTE_RML_TAG_WIREUP,
0);
if (ret < 0) return ret;
}
} else if (world_size > 2) {
for (i = 0 ; i < world_size ; ++i) {
if (ompi_proc_local() == procs[i]) {
my_index = i;
break;
}
}
for (i = 1 ; i <= world_size / 2 ; ++i) {
next = (my_index + i) % world_size;
prev = (my_index - i + world_size) % world_size;
/* sends do not wait for a match */
ret = orte_rml.send(&procs[next]->proc_name,
msg,
1,
ORTE_RML_TAG_WIREUP,
0);
if (ret < 0) return ret;
ret = orte_rml.recv(&procs[prev]->proc_name,
msg,
1,
ORTE_RML_TAG_WIREUP,
0);
if (ret < 0) return ret;
}
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -107,6 +107,11 @@ OMPI_DECLSPEC extern bool ompi_mpi_paffinity_alone;
*/
OMPI_DECLSPEC extern bool ompi_mpi_preconnect_all;
/**
* should we wireup the oob completely during MPI_INIT?
*/
OMPI_DECLSPEC extern bool ompi_mpi_preconnect_oob;
/**
* Whether MPI_ABORT should print out an identifying message
* (e.g., hostname and PID) and loop waiting for a debugger to

Просмотреть файл

@ -229,7 +229,7 @@ int mca_oob_tcp_component_open(void)
"connect() timeout in seconds, before trying next interface",
false,
false,
600,
10,
&mca_oob_tcp_component.tcp_timeout);
@ -412,7 +412,7 @@ static int mca_oob_tcp_create_listen(void)
mca_oob_tcp_component.tcp_listen_port = inaddr.sin_port;
/* setup listen backlog to maximum allowed by kernel */
if(listen(mca_oob_tcp_component.tcp_listen_sd, SOMAXCONN) < 0) {
if(listen(mca_oob_tcp_component.tcp_listen_sd, 5) < 0) {
opal_output(0, "mca_oob_tcp_component_init: listen(): %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
@ -623,7 +623,7 @@ static int mca_oob_tcp_create_listen_thread(void)
mca_oob_tcp_component.tcp_listen_port = inaddr.sin_port;
/* setup listen backlog to maximum allowed by kernel */
if(listen(mca_oob_tcp_component.tcp_listen_sd, SOMAXCONN) < 0) {
if(listen(mca_oob_tcp_component.tcp_listen_sd, 5) < 0) {
opal_output(0, "mca_oob_tcp_component_init: listen() failed: %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;

Просмотреть файл

@ -54,6 +54,7 @@ typedef uint32_t orte_rml_tag_t;
#define ORTE_RML_TAG_BPROC 17
#define ORTE_RML_TAG_BPROC_ABORT 18
#define ORTE_RML_TAG_SM_BACK_FILE_CREATED 19
#define ORTE_RML_TAG_WIREUP 20
#define ORTE_RML_TAG_DYNAMIC 2000