first fix for the proc reference count problem. It should work now for static scenarios.
I am aware of an additional bug (which I haven't resolved yet), but it should not prevent any application from finishing correctly. Fix for the dynamic scenarios to follow soon. This commit was SVN r3679.
Этот коммит содержится в:
родитель
e03b9292b5
Коммит
a53e4ae2eb
@ -261,8 +261,10 @@ ompi_process_name_t *ompi_comm_get_rport (ompi_process_name_t *port, int send_fi
|
||||
rport = &(rproc->proc_name);
|
||||
|
||||
}
|
||||
if(isnew)
|
||||
if (isnew) {
|
||||
mca_pml.pml_add_procs(&rproc, 1);
|
||||
}
|
||||
|
||||
return rport;
|
||||
}
|
||||
|
||||
@ -427,8 +429,6 @@ int ompi_comm_start_processes (char *command, char **argv, int maxprocs,
|
||||
int ompi_comm_dyn_init (void)
|
||||
{
|
||||
uint32_t jobid;
|
||||
size_t size;
|
||||
ompi_proc_t **myproc=NULL;
|
||||
char *envvarname=NULL, *port_name=NULL;
|
||||
char *oob_port=NULL;
|
||||
int tag, root=0, send_first=1;
|
||||
@ -438,8 +438,13 @@ int ompi_comm_dyn_init (void)
|
||||
ompi_errhandler_t *errhandler = NULL;
|
||||
|
||||
/* get jobid */
|
||||
myproc = ompi_proc_self(&size);
|
||||
jobid = ompi_name_server.get_jobid(&(myproc[0]->proc_name));
|
||||
/* JMS: Previous was using ompi_proc_self() here, which
|
||||
incremented the refcount. That would be fine, but we would
|
||||
have to OBJ_RELEASE it as well. The global
|
||||
ompi_proc_local_proc seemed to have been created for exactly
|
||||
this kind of purpose, so I took the liberty of using it. */
|
||||
jobid = ompi_name_server.get_jobid(&(ompi_proc_local_proc->proc_name));
|
||||
|
||||
|
||||
/* check for appropriate env variable */
|
||||
asprintf(&envvarname, "OMPI_PARENT_PORT_%u", jobid);
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "mca/ns/base/base.h"
|
||||
#include "mpi/runtime/params.h"
|
||||
#include "communicator/communicator.h"
|
||||
#include "group/group.h"
|
||||
#include "attribute/attribute.h"
|
||||
|
||||
/*
|
||||
@ -71,6 +72,7 @@ int ompi_comm_init(void)
|
||||
group->grp_proc_count = size;
|
||||
group->grp_flags |= OMPI_GROUP_INTRINSIC;
|
||||
ompi_set_group_rank(group, ompi_proc_local());
|
||||
ompi_group_increment_proc_count (group);
|
||||
OBJ_RETAIN(group); /* bump reference count for remote reference */
|
||||
|
||||
ompi_mpi_comm_world.c_contextid = 0;
|
||||
@ -299,10 +301,13 @@ static void ompi_comm_destruct(ompi_communicator_t* comm)
|
||||
|
||||
/* Release the collective module */
|
||||
|
||||
mca_coll_base_comm_unselect(comm);
|
||||
if ( MPI_COMM_NULL != comm ) {
|
||||
mca_coll_base_comm_unselect(comm);
|
||||
}
|
||||
|
||||
/* Check if the communicator is a topology */
|
||||
if (OMPI_COMM_IS_CART(comm) || OMPI_COMM_IS_GRAPH(comm)) {
|
||||
if ( MPI_COMM_NULL != comm &&
|
||||
(OMPI_COMM_IS_CART(comm) || OMPI_COMM_IS_GRAPH(comm))) {
|
||||
|
||||
/* check and free individual things */
|
||||
|
||||
@ -333,18 +338,31 @@ static void ompi_comm_destruct(ompi_communicator_t* comm)
|
||||
|
||||
comm->c_topo_component = NULL;
|
||||
|
||||
/* Tell the PML that this communicator is done.
|
||||
mca_pml.pml_add_comm() was called explicitly in
|
||||
ompi_comm_init() when setting up COMM_WORLD and COMM_SELF; it's
|
||||
called in ompi_comm_set() for all others. This means that all
|
||||
communicators must be destroyed before the PML shuts down. */
|
||||
|
||||
if ( MPI_COMM_NULL != comm ) {
|
||||
mca_pml.pml_del_comm (comm);
|
||||
}
|
||||
|
||||
|
||||
/* Release topology information */
|
||||
mca_topo_base_comm_unselect(comm);
|
||||
|
||||
if (NULL != comm->c_local_group) {
|
||||
ompi_group_decrement_proc_count (comm->c_local_group);
|
||||
OBJ_RELEASE ( comm->c_local_group );
|
||||
comm->c_local_group = NULL;
|
||||
if (OMPI_COMM_IS_INTRA(comm) ) {
|
||||
comm->c_remote_group = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* the reference count is always popped up for the
|
||||
remote group (even for intra-comms), so we have to
|
||||
decrement it again in all cases. */
|
||||
if (NULL != comm->c_remote_group) {
|
||||
ompi_group_decrement_proc_count (comm->c_remote_group);
|
||||
OBJ_RELEASE ( comm->c_remote_group );
|
||||
comm->c_remote_group = NULL;
|
||||
}
|
||||
|
@ -79,6 +79,14 @@ ompi_group_t *ompi_group_allocate(int group_size);
|
||||
*/
|
||||
void ompi_group_increment_proc_count(ompi_group_t *group);
|
||||
|
||||
/**
|
||||
* Decrement the reference count of the proc structures.
|
||||
*
|
||||
* @param group Pointer to ompi_group_t structute (IN)
|
||||
*
|
||||
*/
|
||||
void ompi_group_decrement_proc_count(ompi_group_t *group);
|
||||
|
||||
|
||||
/**
|
||||
* Initialize OMPI group infrastructure.
|
||||
|
@ -96,6 +96,22 @@ void ompi_group_increment_proc_count(ompi_group_t *group)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* decrement the reference count of the proc structures
|
||||
*/
|
||||
void ompi_group_decrement_proc_count(ompi_group_t *group)
|
||||
{
|
||||
/* local variable */
|
||||
int proc;
|
||||
|
||||
for (proc = 0; proc < group->grp_proc_count; proc++) {
|
||||
OBJ_RELEASE(group->grp_proc_pointers[proc]);
|
||||
}
|
||||
|
||||
/* return */
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* group constructor
|
||||
|
@ -57,8 +57,6 @@ int MPI_Comm_disconnect(MPI_Comm *comm)
|
||||
(*comm)->c_coll.coll_barrier(*comm);
|
||||
}
|
||||
|
||||
OBJ_RETAIN(*comm);
|
||||
|
||||
*comm = MPI_COMM_NULL;
|
||||
ompi_comm_free(comm);
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
@ -49,6 +49,7 @@ int MPI_Group_free(MPI_Group *group)
|
||||
}
|
||||
|
||||
l_group = (ompi_group_t *) *group;
|
||||
ompi_group_decrement_proc_count (l_group);
|
||||
OBJ_RELEASE(l_group);
|
||||
|
||||
*group = MPI_GROUP_NULL;
|
||||
|
@ -83,14 +83,7 @@ int ompi_mpi_finalize(void)
|
||||
}
|
||||
}
|
||||
|
||||
/* shutdown communications */
|
||||
if (OMPI_SUCCESS != (ret = mca_ptl_base_close())) {
|
||||
return ret;
|
||||
}
|
||||
if (OMPI_SUCCESS != (ret = mca_pml_base_close())) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* Shut down any bindings-specific issues: C++, F77, F90 (may or
|
||||
may not be necessary...?) */
|
||||
|
||||
@ -113,6 +106,16 @@ int ompi_mpi_finalize(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Now that all MPI objects dealing with communications are gone,
|
||||
shut down MCA types having to do with communications */
|
||||
if (OMPI_SUCCESS != (ret = mca_ptl_base_close())) {
|
||||
return ret;
|
||||
}
|
||||
if (OMPI_SUCCESS != (ret = mca_pml_base_close())) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* Free secondary resources */
|
||||
|
||||
/* free attr resources */
|
||||
@ -125,6 +128,11 @@ int ompi_mpi_finalize(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* free proc resources */
|
||||
if ( OMPI_SUCCESS != (ret = ompi_proc_finalize())) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* free internal error resources */
|
||||
if (OMPI_SUCCESS != (ret = ompi_errcode_intern_finalize())) {
|
||||
return ret;
|
||||
|
@ -99,6 +99,24 @@ int ompi_proc_init(void)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_proc_finalize (void)
|
||||
{
|
||||
ompi_proc_t *proc, *nextproc, *endproc;
|
||||
|
||||
proc = (ompi_proc_t*)ompi_list_get_first(&ompi_proc_list);
|
||||
nextproc = (ompi_proc_t*)ompi_list_get_next(proc);
|
||||
endproc = (ompi_proc_t*)ompi_list_get_end(&ompi_proc_list);
|
||||
|
||||
OBJ_RELEASE(proc);
|
||||
while ( nextproc != endproc ) {
|
||||
proc = nextproc;
|
||||
nextproc = (ompi_proc_t *)ompi_list_get_next(proc);
|
||||
OBJ_RELEASE(proc);
|
||||
}
|
||||
OBJ_DESTRUCT(&ompi_proc_list);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
ompi_proc_t** ompi_proc_world(size_t *size)
|
||||
{
|
||||
|
@ -48,6 +48,11 @@ OMPI_DECLSPEC extern ompi_proc_t* ompi_proc_local_proc;
|
||||
*/
|
||||
int ompi_proc_init(void);
|
||||
|
||||
/**
|
||||
* Release the processes at the end of the application
|
||||
*/
|
||||
int ompi_proc_finalize(void);
|
||||
|
||||
/**
|
||||
* Returns the list of proc instances associated with this job.
|
||||
*/
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user