Allow the OOB to connect between all MPI applications during MPI_INIT
without also establishing MPI connectivity. This commit was SVN r13593.
Этот коммит содержится в:
родитель
418f9ee5c8
Коммит
81472363ea
@ -110,6 +110,7 @@ extern "C" {
|
||||
* be made if they will be made).
|
||||
*/
|
||||
int ompi_init_do_preconnect(void);
|
||||
int ompi_init_do_oob_preconnect(void);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
|
@ -661,6 +661,27 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
gettimeofday(&ompistart, NULL);
|
||||
}
|
||||
|
||||
/* wire up the oob interface, if requested. Do this here because
|
||||
it will go much faster before the event library is switched
|
||||
into non-blocking mode */
|
||||
if (ompi_mpi_preconnect_oob) {
|
||||
if (OMPI_SUCCESS != (ret = ompi_init_do_oob_preconnect())) {
|
||||
error = "ompi_mpi_do_preconnect_oob() failed";
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/* check for timing request - get stop time and report elapsed
|
||||
time if so, then start the clock again */
|
||||
if (timing) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time from stage 2 cast to complete oob wireup %ld usec",
|
||||
(long)ORTE_PROC_MY_NAME->vpid,
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
gettimeofday(&ompistart, NULL);
|
||||
}
|
||||
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 0
|
||||
/* Start setting up the event engine for MPI operations. Don't
|
||||
block in the event library, so that communications don't take
|
||||
@ -737,10 +758,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
if (ompi_mpi_preconnect_all) {
|
||||
if (OMPI_SUCCESS != (ret = ompi_init_do_preconnect())) {
|
||||
error = "ompi_mpi_do_preconnect_all() failed";
|
||||
/* This will loop back up above, but ret != OMPI_SUCCESS,
|
||||
so we'll end up returning out of this function before
|
||||
getting here (and therefore avoiding an infinite
|
||||
loop) */
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
@ -769,7 +786,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
/* check for timing request - get stop time and report elapsed time if so */
|
||||
if (timing) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time from stage2 xcast to complete mpi_init %ld usec",
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time from oob wireup to complete mpi_init %ld usec",
|
||||
(long)ORTE_PROC_MY_NAME->vpid,
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
|
@ -50,6 +50,7 @@ bool ompi_mpi_abort_print_stack = false;
|
||||
int ompi_mpi_abort_delay = 0;
|
||||
bool ompi_mpi_keep_peer_hostnames = true;
|
||||
bool ompi_mpi_preconnect_all = false;
|
||||
bool ompi_mpi_preconnect_oob = false;
|
||||
bool ompi_mpi_leave_pinned = false;
|
||||
bool ompi_mpi_leave_pinned_pipeline = false;
|
||||
|
||||
@ -190,6 +191,13 @@ int ompi_mpi_register_params(void)
|
||||
|
||||
ompi_mpi_preconnect_all = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
mca_base_param_reg_int_name("mpi", "preconnect_oob",
|
||||
"Whether to force MPI processes to fully wire-up the OOB system between MPI processes.",
|
||||
false, false,
|
||||
(int) ompi_mpi_preconnect_oob, &value);
|
||||
|
||||
ompi_mpi_preconnect_oob = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
/* Leave pinned parameter */
|
||||
|
||||
mca_base_param_reg_int_name("mpi", "leave_pinned",
|
||||
|
@ -20,6 +20,8 @@
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/runtime/mpiruntime.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
|
||||
/*
|
||||
* do zero byte IRECV / ISEND: upper half sends to lower half (i.e. do
|
||||
@ -81,3 +83,64 @@ int ompi_init_do_preconnect(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int ompi_init_do_oob_preconnect(void)
|
||||
{
|
||||
size_t world_size, i, next, prev, my_index;
|
||||
ompi_proc_t **procs;
|
||||
int ret;
|
||||
struct iovec msg[1];
|
||||
|
||||
procs = ompi_proc_world(&world_size);
|
||||
|
||||
msg[0].iov_base = NULL;
|
||||
msg[0].iov_len = 0;
|
||||
|
||||
if (world_size == 2) {
|
||||
if (ompi_proc_local() == procs[0]) {
|
||||
ret = orte_rml.send(&procs[1]->proc_name,
|
||||
msg,
|
||||
1,
|
||||
ORTE_RML_TAG_WIREUP,
|
||||
0);
|
||||
if (ret < 0) return ret;
|
||||
} else {
|
||||
ret = orte_rml.recv(&procs[0]->proc_name,
|
||||
msg,
|
||||
1,
|
||||
ORTE_RML_TAG_WIREUP,
|
||||
0);
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
} else if (world_size > 2) {
|
||||
for (i = 0 ; i < world_size ; ++i) {
|
||||
if (ompi_proc_local() == procs[i]) {
|
||||
my_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 1 ; i <= world_size / 2 ; ++i) {
|
||||
next = (my_index + i) % world_size;
|
||||
prev = (my_index - i + world_size) % world_size;
|
||||
|
||||
/* sends do not wait for a match */
|
||||
ret = orte_rml.send(&procs[next]->proc_name,
|
||||
msg,
|
||||
1,
|
||||
ORTE_RML_TAG_WIREUP,
|
||||
0);
|
||||
if (ret < 0) return ret;
|
||||
|
||||
ret = orte_rml.recv(&procs[prev]->proc_name,
|
||||
msg,
|
||||
1,
|
||||
ORTE_RML_TAG_WIREUP,
|
||||
0);
|
||||
if (ret < 0) return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -107,6 +107,11 @@ OMPI_DECLSPEC extern bool ompi_mpi_paffinity_alone;
|
||||
*/
|
||||
OMPI_DECLSPEC extern bool ompi_mpi_preconnect_all;
|
||||
|
||||
/**
|
||||
* should we wireup the oob completely during MPI_INIT?
|
||||
*/
|
||||
OMPI_DECLSPEC extern bool ompi_mpi_preconnect_oob;
|
||||
|
||||
/**
|
||||
* Whether MPI_ABORT should print out an identifying message
|
||||
* (e.g., hostname and PID) and loop waiting for a debugger to
|
||||
|
@ -229,7 +229,7 @@ int mca_oob_tcp_component_open(void)
|
||||
"connect() timeout in seconds, before trying next interface",
|
||||
false,
|
||||
false,
|
||||
600,
|
||||
10,
|
||||
&mca_oob_tcp_component.tcp_timeout);
|
||||
|
||||
|
||||
@ -412,7 +412,7 @@ static int mca_oob_tcp_create_listen(void)
|
||||
mca_oob_tcp_component.tcp_listen_port = inaddr.sin_port;
|
||||
|
||||
/* setup listen backlog to maximum allowed by kernel */
|
||||
if(listen(mca_oob_tcp_component.tcp_listen_sd, SOMAXCONN) < 0) {
|
||||
if(listen(mca_oob_tcp_component.tcp_listen_sd, 5) < 0) {
|
||||
opal_output(0, "mca_oob_tcp_component_init: listen(): %s (%d)",
|
||||
strerror(opal_socket_errno), opal_socket_errno);
|
||||
return ORTE_ERROR;
|
||||
@ -623,7 +623,7 @@ static int mca_oob_tcp_create_listen_thread(void)
|
||||
mca_oob_tcp_component.tcp_listen_port = inaddr.sin_port;
|
||||
|
||||
/* setup listen backlog to maximum allowed by kernel */
|
||||
if(listen(mca_oob_tcp_component.tcp_listen_sd, SOMAXCONN) < 0) {
|
||||
if(listen(mca_oob_tcp_component.tcp_listen_sd, 5) < 0) {
|
||||
opal_output(0, "mca_oob_tcp_component_init: listen() failed: %s (%d)",
|
||||
strerror(opal_socket_errno), opal_socket_errno);
|
||||
return ORTE_ERROR;
|
||||
|
@ -54,6 +54,7 @@ typedef uint32_t orte_rml_tag_t;
|
||||
#define ORTE_RML_TAG_BPROC 17
|
||||
#define ORTE_RML_TAG_BPROC_ABORT 18
|
||||
#define ORTE_RML_TAG_SM_BACK_FILE_CREATED 19
|
||||
#define ORTE_RML_TAG_WIREUP 20
|
||||
|
||||
#define ORTE_RML_TAG_DYNAMIC 2000
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user