After extensive conversations about this...
- My original patch stands: MPI_FINALIZE directly invokes the attribute callbacks on MPI_COMM_SELF - We added some user-level checks to ensure that they don't call MPI_FINALIZE twice (this isn't really required, but it will prevent whacky segv's -- they'll at least get a nice error message) - Removed the attribute callbacks on MPI_COMM_SELF from ompi_mpi_comm_finalize (i.e., we just moved them from ompi_mpi_comm_finalize to ompi_mpi_finalize -- we just moved this process up earlier in the MPI_FINALIZE sequence of events) - Because there were so many conversations about this, here's the rationale: - MPI-2:4.8 says that we have to MPI_COMM_FREE MPI_COMM_SELF so that the attribute callbacks are invoked. - After considerable discussion, we came to the conclusion that FREE'ing COMM_SELF is not the issue -- calling the callbacks is the issue. - So it is sufficent for MPI_FINALIZE to directly invoke these attribute callbacks - The attribute callbacks are *not* invoked on other communicators because said communicators are not MPI_COMM_FREE'ed This commit was SVN r9628.
Этот коммит содержится в:
родитель
480af1c150
Коммит
82d590629d
@ -188,25 +188,16 @@ int ompi_comm_finalize(void)
|
||||
int max, i;
|
||||
ompi_communicator_t *comm;
|
||||
|
||||
/* MPI-2 section 4.8: call the attribute
|
||||
delete functions attached to MPI_COMM_SELF
|
||||
and destroy comm_self before any other communicator */
|
||||
comm = &ompi_mpi_comm_self;
|
||||
if (NULL != comm->c_keyhash) {
|
||||
ompi_attr_delete_all(COMM_ATTR, comm, comm->c_keyhash);
|
||||
/* ignoring that the attribute delete functions might
|
||||
return an errorcode != MPI_SUCCESS.
|
||||
Hey, we are in finalize, can finalize fail ??? */
|
||||
OBJ_RELEASE(comm->c_keyhash);
|
||||
}
|
||||
/* Shut down MPI_COMM_SELF */
|
||||
OBJ_DESTRUCT( &ompi_mpi_comm_self );
|
||||
|
||||
/* disconnect all dynamic communicators */
|
||||
ompi_comm_dyn_finalize();
|
||||
|
||||
/* Destroy all predefined communicators */
|
||||
/* Shut down MPI_COMM_WORLD */
|
||||
OBJ_DESTRUCT( &ompi_mpi_comm_world );
|
||||
|
||||
/* Shut down the parent communicator, if it exists */
|
||||
if( ompi_mpi_comm_parent != &ompi_mpi_comm_null ) {
|
||||
/* Note that we pass ompi_mpi_comm_parent here
|
||||
(vs. &ompi_mpi_comm_parent) because it is of type
|
||||
@ -217,6 +208,7 @@ int ompi_comm_finalize(void)
|
||||
OBJ_DESTRUCT (ompi_mpi_comm_parent);
|
||||
}
|
||||
|
||||
/* Shut down MPI_COMM_NULL */
|
||||
OBJ_DESTRUCT( &ompi_mpi_comm_null );
|
||||
|
||||
/* Check whether we have some communicators left */
|
||||
|
@ -42,3 +42,10 @@ Typical causes for this problem include:
|
||||
which case Open MPI will not bind any processes on that node
|
||||
- A startup mechanism was used which did not tell Open MPI which
|
||||
processors to bind processes to
|
||||
[mpi_finalize:invoked_multiple_times]
|
||||
The function MPI_FINALIZE was invoked multiple times in a single
|
||||
process on host %s, PID %d.
|
||||
|
||||
This indicates an erroneous MPI program; MPI_FINALIZE is only allowed
|
||||
to be invoked exactly once in a process.
|
||||
|
||||
|
@ -19,10 +19,25 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_PARAM_H
|
||||
#include <sys/param.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETDB_H
|
||||
#include <netdb.h>
|
||||
#endif
|
||||
|
||||
#include "opal/event/event.h"
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
#include "opal/mca/maffinity/base/base.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/sys/atomic.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/mca/schema/schema.h"
|
||||
@ -64,10 +79,30 @@
|
||||
int ompi_mpi_finalize(void)
|
||||
{
|
||||
int ret;
|
||||
static int32_t finalize_has_already_started = 0;
|
||||
|
||||
/* Delete attributes on MPI_COMM_SELF per MPI-2:4.8. Must be done
|
||||
before anything else in FINALIZE, and ensure that MPI_FINALIZED
|
||||
still returns false. */
|
||||
/* Be a bit social if an erroneous program calls MPI_FINALIZE in
|
||||
two different threads, otherwise we may deadlock in
|
||||
ompi_comm_free() (or run into other nasty lions, tigers, or
|
||||
bears) */
|
||||
|
||||
if (! opal_atomic_cmpset_32(&finalize_has_already_started, 0, 1)) {
|
||||
/* Note that if we're already finalized, we cannot raise an
|
||||
MPI exception. The best that we can do is write something
|
||||
to stderr. */
|
||||
char hostname[MAXHOSTNAMELEN];
|
||||
pid_t pid = getpid();
|
||||
gethostname(hostname, sizeof(hostname));
|
||||
|
||||
opal_show_help("help-mpi-runtime.txt",
|
||||
"mpi_finalize:invoked_multiple_times",
|
||||
true, hostname, pid);
|
||||
return MPI_ERR_OTHER;
|
||||
}
|
||||
|
||||
/* Per MPI-2:4.8, we have to free MPI_COMM_SELF before doing
|
||||
anything else in MPI_FINALIZE (to include setting up such that
|
||||
MPI_FINALIZED will return true). */
|
||||
|
||||
if (NULL != ompi_mpi_comm_self.c_keyhash) {
|
||||
ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_self,
|
||||
@ -76,6 +111,8 @@ int ompi_mpi_finalize(void)
|
||||
ompi_mpi_comm_self.c_keyhash = NULL;
|
||||
}
|
||||
|
||||
/* Proceed with MPI_FINALIZE */
|
||||
|
||||
ompi_mpi_finalized = true;
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 0
|
||||
opal_progress_events(OPAL_EVLOOP_NONBLOCK);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user