diff --git a/ompi/communicator/comm_cid.c b/ompi/communicator/comm_cid.c index 9e25217cb6..7eded4e9fc 100644 --- a/ompi/communicator/comm_cid.c +++ b/ompi/communicator/comm_cid.c @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2006 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +31,7 @@ #include "ompi/mca/coll/base/base.h" #include "orte/mca/rml/rml.h" #include "ompi/request/request.h" +#include "ompi/runtime/mpiruntime.h" #if defined(c_plusplus) || defined(__cplusplus) extern "C" { @@ -41,6 +43,8 @@ extern "C" { * and a bridge-comm (intercomm-create scenario). */ +static int cid_block_start = 28; + typedef int ompi_comm_cid_allredfct (int *inbuf, int* outbuf, int count, struct ompi_op_t *op, ompi_communicator_t *comm, @@ -104,7 +108,6 @@ static opal_mutex_t ompi_cid_lock; #endif /* OMPI_HAVE_THREAD_SUPPORT */ static opal_list_t ompi_registered_comms; - int ompi_comm_nextcid ( ompi_communicator_t* newcomm, ompi_communicator_t* comm, ompi_communicator_t* bridgecomm, @@ -112,14 +115,9 @@ int ompi_comm_nextcid ( ompi_communicator_t* newcomm, void* remote_leader, int mode, int send_first ) { - - int nextlocal_cid; - int nextcid; - int done=0; - int response=0, glresponse=0; + int nextcid, block; + int global_block_start; bool flag; - int start=ompi_mpi_communicators.lowest_free; - int i; ompi_comm_cid_allredfct* allredfnct; @@ -146,79 +144,149 @@ int ompi_comm_nextcid ( ompi_communicator_t* newcomm, break; } - - OPAL_THREAD_LOCK(&ompi_cid_lock); - ompi_comm_register_cid (comm->c_contextid); - OPAL_THREAD_UNLOCK(&ompi_cid_lock); - - while (!done) { - /** - * This is the real algorithm described in the doc - */ - + /** + * In case multi-threading is enabled, we revert to the old algorithm + * starting from cid_block_start + */ + if (MPI_THREAD_MULTIPLE == ompi_mpi_thread_provided) { + int nextlocal_cid; + int done=0; + int response=0, glresponse=0; + int start=ompi_mpi_communicators.lowest_free; + int i; + OPAL_THREAD_LOCK(&ompi_cid_lock); - if (comm->c_contextid != ompi_comm_lowest_cid() ) { - /* if not lowest cid, we do not continue, but sleep and try again */ - OPAL_THREAD_UNLOCK(&ompi_cid_lock); - continue; - } + ompi_comm_register_cid (comm->c_contextid); OPAL_THREAD_UNLOCK(&ompi_cid_lock); - - - for (i=start; i < mca_pml.pml_max_contextid ; i++) { - flag=ompi_pointer_array_test_and_set_item(&ompi_mpi_communicators, i, comm); - if (true == flag) { - nextlocal_cid = i; - break; - } - } - - (allredfnct)(&nextlocal_cid, &nextcid, 1, MPI_MAX, comm, bridgecomm, - local_leader, remote_leader, send_first ); - if (nextcid == nextlocal_cid) { - response = 1; /* fine with me */ - } - else { - ompi_pointer_array_set_item(&ompi_mpi_communicators, - nextlocal_cid, NULL); - - flag = ompi_pointer_array_test_and_set_item(&ompi_mpi_communicators, - nextcid, comm ); - if (true == flag) { - response = 1; /* works as well */ - } - else { - response = 0; /* nope, not acceptable */ - } - } - - (allredfnct)(&response, &glresponse, 1, MPI_MIN, comm, bridgecomm, - local_leader, remote_leader, send_first ); - if (1 == glresponse) { - done = 1; /* we are done */ - break; - } - else if ( 0 == glresponse ) { - if ( 1 == response ) { - /* we could use that, but other don't agree */ - ompi_pointer_array_set_item(&ompi_mpi_communicators, - nextcid, NULL); + + while (!done) { + /** + * This is the real algorithm described in the doc + */ + + OPAL_THREAD_LOCK(&ompi_cid_lock); + if (comm->c_contextid != ompi_comm_lowest_cid() ) { + /* if not lowest cid, we do not continue, but sleep and try again */ + OPAL_THREAD_UNLOCK(&ompi_cid_lock); + continue; + } + OPAL_THREAD_UNLOCK(&ompi_cid_lock); + + + for (i=start; i < mca_pml.pml_max_contextid ; i++) { + flag=ompi_pointer_array_test_and_set_item(&ompi_mpi_communicators, + i, comm); + if (true == flag) { + nextlocal_cid = i; + break; + } + } + + (allredfnct)(&nextlocal_cid, &nextcid, 1, MPI_MAX, comm, bridgecomm, + local_leader, remote_leader, send_first ); + if (nextcid == nextlocal_cid) { + response = 1; /* fine with me */ + } + else { + ompi_pointer_array_set_item(&ompi_mpi_communicators, + nextlocal_cid, NULL); + + flag = ompi_pointer_array_test_and_set_item(&ompi_mpi_communicators, + nextcid, comm ); + if (true == flag) { + response = 1; /* works as well */ + } + else { + response = 0; /* nope, not acceptable */ + } + } + + (allredfnct)(&response, &glresponse, 1, MPI_MIN, comm, bridgecomm, + local_leader, remote_leader, send_first ); + if (1 == glresponse) { + done = 1; /* we are done */ + break; + } + else if ( 0 == glresponse ) { + if ( 1 == response ) { + /* we could use that, but other don't agree */ + ompi_pointer_array_set_item(&ompi_mpi_communicators, + nextcid, NULL); + } + start = nextcid+1; /* that's where we can start the next round */ } - start = nextcid+1; /* that's where we can start the next round */ } + + /* set the according values to the newcomm */ + newcomm->c_contextid = nextcid; + newcomm->c_f_to_c_index = newcomm->c_contextid; + ompi_pointer_array_set_item (&ompi_mpi_communicators, nextcid, newcomm); + + OPAL_THREAD_LOCK(&ompi_cid_lock); + ompi_comm_unregister_cid (comm->c_contextid); + OPAL_THREAD_UNLOCK(&ompi_cid_lock); + + return (MPI_SUCCESS); + } + + /** + * In case the communication mode is INTRA_OOB or INTAR_BRIDGE, we use the + * highest-free algorithm + */ + if ( OMPI_COMM_CID_INTRA_OOB == mode || OMPI_COMM_CID_INTRA_BRIDGE == mode) { + (allredfnct)(&cid_block_start, &global_block_start, 1, + MPI_MAX, comm, bridgecomm, + local_leader, remote_leader, send_first ); + cid_block_start = global_block_start; + nextcid = cid_block_start; + cid_block_start = cid_block_start + 1; + } + else { + flag=false; + block = 0; + if( 0 == comm->c_contextid ) { + block = OMPI_COMM_BLOCK_WORLD; + } + else { + block = OMPI_COMM_BLOCK_OTHERS; + } + + while(!flag) { + /** + * If the communicator has IDs available then allocate one for the child + */ + if(MPI_UNDEFINED != comm->c_id_available && + MPI_UNDEFINED != comm->c_id_start_index && + block > comm->c_id_available - comm->c_id_start_index) { + nextcid = comm->c_id_available; + flag=ompi_pointer_array_test_and_set_item (&ompi_mpi_communicators, + nextcid, comm); + } + /** + * Otherwise the communicator needs to negotiate a new block of IDs + */ + else { + (allredfnct)(&cid_block_start, &global_block_start, 1, + MPI_MAX, comm, bridgecomm, + local_leader, remote_leader, send_first ); + cid_block_start = global_block_start; + comm->c_id_available = cid_block_start; + comm->c_id_start_index = cid_block_start; + cid_block_start = cid_block_start + block; + } + } + + comm->c_id_available++; } - /* set the according values to the newcomm */ newcomm->c_contextid = nextcid; newcomm->c_f_to_c_index = newcomm->c_contextid; ompi_pointer_array_set_item (&ompi_mpi_communicators, nextcid, newcomm); - OPAL_THREAD_LOCK(&ompi_cid_lock); - ompi_comm_unregister_cid (comm->c_contextid); - OPAL_THREAD_UNLOCK(&ompi_cid_lock); - return (MPI_SUCCESS); + } + /**************************************************************************/ /**************************************************************************/ /**************************************************************************/ diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index e812cb1806..f7957b1fa8 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2006 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -77,6 +78,8 @@ int ompi_comm_init(void) ompi_group_increment_proc_count (group); ompi_mpi_comm_world.c_contextid = 0; + ompi_mpi_comm_world.c_id_start_index = 4; + ompi_mpi_comm_world.c_id_available = 4; ompi_mpi_comm_world.c_f_to_c_index = 0; ompi_mpi_comm_world.c_my_rank = group->grp_my_rank; ompi_mpi_comm_world.c_local_group = group; @@ -108,6 +111,8 @@ int ompi_comm_init(void) ompi_mpi_comm_self.c_contextid = 1; ompi_mpi_comm_self.c_f_to_c_index = 1; + ompi_mpi_comm_self.c_id_start_index = 20; + ompi_mpi_comm_self.c_id_available = 20; ompi_mpi_comm_self.c_my_rank = group->grp_my_rank; ompi_mpi_comm_self.c_local_group = group; ompi_mpi_comm_self.c_remote_group = group; @@ -257,6 +262,8 @@ static void ompi_comm_construct(ompi_communicator_t* comm) comm->c_f_to_c_index = MPI_UNDEFINED; comm->c_name[0] = '\0'; comm->c_contextid = MPI_UNDEFINED; + comm->c_id_available = MPI_UNDEFINED; + comm->c_id_start_index = MPI_UNDEFINED; comm->c_flags = 0; comm->c_my_rank = 0; comm->c_cube_dim = 0; diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index d19f659fb7..3da5bed840 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -83,6 +84,13 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_CID_INTRA_BRIDGE 0x00000080 #define OMPI_COMM_CID_INTRA_OOB 0x00000100 +/** + * The block of CIDs allocated for MPI_COMM_WORLD + * and other communicators + */ +#define OMPI_COMM_BLOCK_WORLD 16 +#define OMPI_COMM_BLOCK_OTHERS 8 + OMPI_DECLSPEC extern ompi_pointer_array_t ompi_mpi_communicators; struct ompi_communicator_t { @@ -95,6 +103,11 @@ struct ompi_communicator_t { uint32_t c_flags; /* flags, e.g. intercomm, topology, etc. */ + int c_id_available; /* the currently available Cid for allocation + to a child*/ + int c_id_start_index; /* the starting index of the block of cids + allocated to tthis communicator*/ + ompi_group_t *c_local_group; ompi_group_t *c_remote_group; @@ -344,7 +357,6 @@ struct ompi_communicator_t { int mode, int send_first); - /** * shut down the communicator infrastructure. */