1
1

first fix for the proc reference count problem. It should work now for static scenarios.

I am aware of an additional bug (which I haven't resolved yet), but it should not
prevent any application from finishing correctly. Fix for the dynamic scenarios to follow soon.

This commit was SVN r3679.
Этот коммит содержится в:
Edgar Gabriel 2004-12-02 13:28:10 +00:00
родитель e03b9292b5
Коммит a53e4ae2eb
9 изменённых файлов: 99 добавлений и 22 удалений

Просмотреть файл

@ -261,8 +261,10 @@ ompi_process_name_t *ompi_comm_get_rport (ompi_process_name_t *port, int send_fi
rport = &(rproc->proc_name);
}
if(isnew)
if (isnew) {
mca_pml.pml_add_procs(&rproc, 1);
}
return rport;
}
@ -427,8 +429,6 @@ int ompi_comm_start_processes (char *command, char **argv, int maxprocs,
int ompi_comm_dyn_init (void)
{
uint32_t jobid;
size_t size;
ompi_proc_t **myproc=NULL;
char *envvarname=NULL, *port_name=NULL;
char *oob_port=NULL;
int tag, root=0, send_first=1;
@ -438,8 +438,13 @@ int ompi_comm_dyn_init (void)
ompi_errhandler_t *errhandler = NULL;
/* get jobid */
myproc = ompi_proc_self(&size);
jobid = ompi_name_server.get_jobid(&(myproc[0]->proc_name));
/* JMS: Previous was using ompi_proc_self() here, which
incremented the refcount. That would be fine, but we would
have to OBJ_RELEASE it as well. The global
ompi_proc_local_proc seemed to have been created for exactly
this kind of purpose, so I took the liberty of using it. */
jobid = ompi_name_server.get_jobid(&(ompi_proc_local_proc->proc_name));
/* check for appropriate env variable */
asprintf(&envvarname, "OMPI_PARENT_PORT_%u", jobid);

Просмотреть файл

@ -26,6 +26,7 @@
#include "mca/ns/base/base.h"
#include "mpi/runtime/params.h"
#include "communicator/communicator.h"
#include "group/group.h"
#include "attribute/attribute.h"
/*
@ -71,6 +72,7 @@ int ompi_comm_init(void)
group->grp_proc_count = size;
group->grp_flags |= OMPI_GROUP_INTRINSIC;
ompi_set_group_rank(group, ompi_proc_local());
ompi_group_increment_proc_count (group);
OBJ_RETAIN(group); /* bump reference count for remote reference */
ompi_mpi_comm_world.c_contextid = 0;
@ -299,10 +301,13 @@ static void ompi_comm_destruct(ompi_communicator_t* comm)
/* Release the collective module */
mca_coll_base_comm_unselect(comm);
if ( MPI_COMM_NULL != comm ) {
mca_coll_base_comm_unselect(comm);
}
/* Check if the communicator is a topology */
if (OMPI_COMM_IS_CART(comm) || OMPI_COMM_IS_GRAPH(comm)) {
if ( MPI_COMM_NULL != comm &&
(OMPI_COMM_IS_CART(comm) || OMPI_COMM_IS_GRAPH(comm))) {
/* check and free individual things */
@ -333,18 +338,31 @@ static void ompi_comm_destruct(ompi_communicator_t* comm)
comm->c_topo_component = NULL;
/* Tell the PML that this communicator is done.
mca_pml.pml_add_comm() was called explicitly in
ompi_comm_init() when setting up COMM_WORLD and COMM_SELF; it's
called in ompi_comm_set() for all others. This means that all
communicators must be destroyed before the PML shuts down. */
if ( MPI_COMM_NULL != comm ) {
mca_pml.pml_del_comm (comm);
}
/* Release topology information */
mca_topo_base_comm_unselect(comm);
if (NULL != comm->c_local_group) {
ompi_group_decrement_proc_count (comm->c_local_group);
OBJ_RELEASE ( comm->c_local_group );
comm->c_local_group = NULL;
if (OMPI_COMM_IS_INTRA(comm) ) {
comm->c_remote_group = NULL;
}
}
/* the reference count is always popped up for the
remote group (even for intra-comms), so we have to
decrement it again in all cases. */
if (NULL != comm->c_remote_group) {
ompi_group_decrement_proc_count (comm->c_remote_group);
OBJ_RELEASE ( comm->c_remote_group );
comm->c_remote_group = NULL;
}

Просмотреть файл

@ -79,6 +79,14 @@ ompi_group_t *ompi_group_allocate(int group_size);
*/
void ompi_group_increment_proc_count(ompi_group_t *group);
/**
* Decrement the reference count of the proc structures.
*
* @param group Pointer to ompi_group_t structute (IN)
*
*/
void ompi_group_decrement_proc_count(ompi_group_t *group);
/**
* Initialize OMPI group infrastructure.

Просмотреть файл

@ -96,6 +96,22 @@ void ompi_group_increment_proc_count(ompi_group_t *group)
return;
}
/*
* decrement the reference count of the proc structures
*/
void ompi_group_decrement_proc_count(ompi_group_t *group)
{
/* local variable */
int proc;
for (proc = 0; proc < group->grp_proc_count; proc++) {
OBJ_RELEASE(group->grp_proc_pointers[proc]);
}
/* return */
return;
}
/*
* group constructor

Просмотреть файл

@ -57,8 +57,6 @@ int MPI_Comm_disconnect(MPI_Comm *comm)
(*comm)->c_coll.coll_barrier(*comm);
}
OBJ_RETAIN(*comm);
*comm = MPI_COMM_NULL;
ompi_comm_free(comm);
return MPI_SUCCESS;
}

Просмотреть файл

@ -49,6 +49,7 @@ int MPI_Group_free(MPI_Group *group)
}
l_group = (ompi_group_t *) *group;
ompi_group_decrement_proc_count (l_group);
OBJ_RELEASE(l_group);
*group = MPI_GROUP_NULL;

Просмотреть файл

@ -83,14 +83,7 @@ int ompi_mpi_finalize(void)
}
}
/* shutdown communications */
if (OMPI_SUCCESS != (ret = mca_ptl_base_close())) {
return ret;
}
if (OMPI_SUCCESS != (ret = mca_pml_base_close())) {
return ret;
}
/* Shut down any bindings-specific issues: C++, F77, F90 (may or
may not be necessary...?) */
@ -113,6 +106,16 @@ int ompi_mpi_finalize(void)
return ret;
}
/* Now that all MPI objects dealing with communications are gone,
shut down MCA types having to do with communications */
if (OMPI_SUCCESS != (ret = mca_ptl_base_close())) {
return ret;
}
if (OMPI_SUCCESS != (ret = mca_pml_base_close())) {
return ret;
}
/* Free secondary resources */
/* free attr resources */
@ -125,6 +128,11 @@ int ompi_mpi_finalize(void)
return ret;
}
/* free proc resources */
if ( OMPI_SUCCESS != (ret = ompi_proc_finalize())) {
return ret;
}
/* free internal error resources */
if (OMPI_SUCCESS != (ret = ompi_errcode_intern_finalize())) {
return ret;

Просмотреть файл

@ -99,6 +99,24 @@ int ompi_proc_init(void)
return OMPI_SUCCESS;
}
int ompi_proc_finalize (void)
{
ompi_proc_t *proc, *nextproc, *endproc;
proc = (ompi_proc_t*)ompi_list_get_first(&ompi_proc_list);
nextproc = (ompi_proc_t*)ompi_list_get_next(proc);
endproc = (ompi_proc_t*)ompi_list_get_end(&ompi_proc_list);
OBJ_RELEASE(proc);
while ( nextproc != endproc ) {
proc = nextproc;
nextproc = (ompi_proc_t *)ompi_list_get_next(proc);
OBJ_RELEASE(proc);
}
OBJ_DESTRUCT(&ompi_proc_list);
return OMPI_SUCCESS;
}
ompi_proc_t** ompi_proc_world(size_t *size)
{

Просмотреть файл

@ -48,6 +48,11 @@ OMPI_DECLSPEC extern ompi_proc_t* ompi_proc_local_proc;
*/
int ompi_proc_init(void);
/**
* Release the processes at the end of the application
*/
int ompi_proc_finalize(void);
/**
* Returns the list of proc instances associated with this job.
*/