1
1

After extensive conversations about this...

- My original patch stands: MPI_FINALIZE directly invokes the
  attribute callbacks on MPI_COMM_SELF
- We added some user-level checks to ensure that they don't call
  MPI_FINALIZE twice (this isn't really required, but it will prevent
  whacky segv's -- they'll at least get a nice error message)
- Removed the attribute callbacks on MPI_COMM_SELF from
  ompi_mpi_comm_finalize (i.e., we just moved them from
  ompi_mpi_comm_finalize to ompi_mpi_finalize -- we just moved this
  process up earlier in the MPI_FINALIZE sequence of events)
- Because there were so many conversations about this, here's the
  rationale:
  - MPI-2:4.8 says that we have to MPI_COMM_FREE MPI_COMM_SELF so that
    the attribute callbacks are invoked.
  - After considerable discussion, we came to the conclusion that
    FREE'ing COMM_SELF is not the issue -- calling the callbacks is
    the issue.
  - So it is sufficent for MPI_FINALIZE to directly invoke these
    attribute callbacks
  - The attribute callbacks are *not* invoked on other communicators
    because said communicators are not MPI_COMM_FREE'ed

This commit was SVN r9628.
Этот коммит содержится в:
Jeff Squyres 2006-04-13 17:00:36 +00:00
родитель 480af1c150
Коммит 82d590629d
3 изменённых файлов: 51 добавлений и 15 удалений

Просмотреть файл

@ -188,25 +188,16 @@ int ompi_comm_finalize(void)
int max, i;
ompi_communicator_t *comm;
/* MPI-2 section 4.8: call the attribute
delete functions attached to MPI_COMM_SELF
and destroy comm_self before any other communicator */
comm = &ompi_mpi_comm_self;
if (NULL != comm->c_keyhash) {
ompi_attr_delete_all(COMM_ATTR, comm, comm->c_keyhash);
/* ignoring that the attribute delete functions might
return an errorcode != MPI_SUCCESS.
Hey, we are in finalize, can finalize fail ??? */
OBJ_RELEASE(comm->c_keyhash);
}
/* Shut down MPI_COMM_SELF */
OBJ_DESTRUCT( &ompi_mpi_comm_self );
/* disconnect all dynamic communicators */
ompi_comm_dyn_finalize();
/* Destroy all predefined communicators */
/* Shut down MPI_COMM_WORLD */
OBJ_DESTRUCT( &ompi_mpi_comm_world );
/* Shut down the parent communicator, if it exists */
if( ompi_mpi_comm_parent != &ompi_mpi_comm_null ) {
/* Note that we pass ompi_mpi_comm_parent here
(vs. &ompi_mpi_comm_parent) because it is of type
@ -217,6 +208,7 @@ int ompi_comm_finalize(void)
OBJ_DESTRUCT (ompi_mpi_comm_parent);
}
/* Shut down MPI_COMM_NULL */
OBJ_DESTRUCT( &ompi_mpi_comm_null );
/* Check whether we have some communicators left */

Просмотреть файл

@ -42,3 +42,10 @@ Typical causes for this problem include:
which case Open MPI will not bind any processes on that node
- A startup mechanism was used which did not tell Open MPI which
processors to bind processes to
[mpi_finalize:invoked_multiple_times]
The function MPI_FINALIZE was invoked multiple times in a single
process on host %s, PID %d.
This indicates an erroneous MPI program; MPI_FINALIZE is only allowed
to be invoked exactly once in a process.

Просмотреть файл

@ -19,10 +19,25 @@
#include "ompi_config.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif
#include "opal/event/event.h"
#include "opal/runtime/opal_progress.h"
#include "opal/mca/maffinity/base/base.h"
#include "opal/mca/base/base.h"
#include "opal/util/show_help.h"
#include "opal/sys/atomic.h"
#include "orte/util/proc_info.h"
#include "orte/mca/schema/schema.h"
@ -64,10 +79,30 @@
int ompi_mpi_finalize(void)
{
int ret;
static int32_t finalize_has_already_started = 0;
/* Delete attributes on MPI_COMM_SELF per MPI-2:4.8. Must be done
before anything else in FINALIZE, and ensure that MPI_FINALIZED
still returns false. */
/* Be a bit social if an erroneous program calls MPI_FINALIZE in
two different threads, otherwise we may deadlock in
ompi_comm_free() (or run into other nasty lions, tigers, or
bears) */
if (! opal_atomic_cmpset_32(&finalize_has_already_started, 0, 1)) {
/* Note that if we're already finalized, we cannot raise an
MPI exception. The best that we can do is write something
to stderr. */
char hostname[MAXHOSTNAMELEN];
pid_t pid = getpid();
gethostname(hostname, sizeof(hostname));
opal_show_help("help-mpi-runtime.txt",
"mpi_finalize:invoked_multiple_times",
true, hostname, pid);
return MPI_ERR_OTHER;
}
/* Per MPI-2:4.8, we have to free MPI_COMM_SELF before doing
anything else in MPI_FINALIZE (to include setting up such that
MPI_FINALIZED will return true). */
if (NULL != ompi_mpi_comm_self.c_keyhash) {
ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_self,
@ -76,6 +111,8 @@ int ompi_mpi_finalize(void)
ompi_mpi_comm_self.c_keyhash = NULL;
}
/* Proceed with MPI_FINALIZE */
ompi_mpi_finalized = true;
#if OMPI_ENABLE_PROGRESS_THREADS == 0
opal_progress_events(OPAL_EVLOOP_NONBLOCK);