1
1

add a uint8_t to the startup modex which allows us to recognize whether

different processes have requested different levels of thread support. This
verification is restricted to MPI_COMM_WORLD.

In case one ore more processes have requested support for MPI_THREAD_MULTIPLE,
the cid selection algorithm will fall back to the original, thread safe
approach. Else, it uses the block-algorithm.

For dynamic communicators, we always fall back now to the original algorithm. 
This has been tested for homogeneous and heterogeneous settings for
MCW. However, I could not test yet the dynamic comm scenario for technical
reasons, and that's why I don't close yet ticket 1949.

This commit was SVN r21613.
Этот коммит содержится в:
Edgar Gabriel 2009-07-07 18:32:14 +00:00
родитель 311e27b42f
Коммит b6f292f794
5 изменённых файлов: 103 добавлений и 5 удалений

Просмотреть файл

@ -25,6 +25,7 @@
#include "opal/dss/dss.h"
#include "orte/types.h"
#include "ompi/proc/proc.h"
#include "ompi/communicator/communicator.h"
#include "ompi/op/op.h"
#include "ompi/constants.h"
@ -33,6 +34,7 @@
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/request/request.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/mca/dpm/dpm.h"
@ -114,6 +116,43 @@ OBJ_CLASS_INSTANCE (ompi_comm_reg_t,
static opal_mutex_t ompi_cid_lock;
static opal_list_t ompi_registered_comms;
/* This variable is zero (false) if all processes in MPI_COMM_WORLD
* did not require MPI_THREAD_MULTIPLE support, and is 1 (true) as
* soon as at least one process requested support for THREAD_MULTIPLE */
static int ompi_comm_world_thread_level_mult=0;
int ompi_comm_cid_init (void)
{
ompi_proc_t **procs, *thisproc;
uint8_t thread_level;
void *tlpointer;
int ret, i;
size_t size, numprocs;
/** Note that the following call only returns processes
* with the same jobid. This is on purpose, since
* we switch for the dynamic communicators anyway
* to the original (slower) cid allocation algorithm.
*/
procs = ompi_proc_world ( &numprocs );
for ( i=0; i<numprocs; i++ ) {
thisproc = procs[i];
if (OMPI_SUCCESS != (ret = ompi_modex_recv_string("MPI_THREAD_LEVEL", thisproc, &tlpointer, &size))) {
return OMPI_ERROR;
}
thread_level = *((uint8_t *) tlpointer);
if ( OMPI_THREADLEVEL_IS_MULTIPLE (thread_level) ) {
ompi_comm_world_thread_level_mult = 1;
break;
}
}
return OMPI_SUCCESS;
}
int ompi_comm_nextcid ( ompi_communicator_t* newcomm,
ompi_communicator_t* comm,
@ -153,16 +192,17 @@ int ompi_comm_nextcid ( ompi_communicator_t* newcomm,
}
/**
* In case multi-threading is enabled, we revert to the old algorithm
* In case multi-threading is enabled by at least one process, or in
* case of dynamic communicators, we revert to the old algorithm
* starting from cid_block_start
*/
if (MPI_THREAD_MULTIPLE == ompi_mpi_thread_provided) {
if ( ompi_comm_world_thread_level_mult || OMPI_COMM_IS_DYNAMIC (newcomm) ) {
int nextlocal_cid;
int done=0;
int response, glresponse=0;
int start;
unsigned int i;
do {
/* Only one communicator function allowed in same time on the
* same communicator.

Просмотреть файл

@ -58,7 +58,6 @@ OBJ_CLASS_INSTANCE(ompi_communicator_t,opal_object_t,ompi_comm_construct,ompi_co
shortcut for finalize and abort. */
int ompi_comm_num_dyncomm=0;
/*
* Initialize comm world/self/null/parent.
*/

Просмотреть файл

@ -510,6 +510,13 @@ extern int ompi_comm_num_dyncomm;
OMPI_DECLSPEC void ompi_comm_checkfor_blockreset ( ompi_communicator_t *comm );
/* check whether any of the processes has requested support for
MPI_THREAD_MULTIPLE. If yes, we can not use any of the
advanced cid allocation algorithms
*/
OMPI_DECLSPEC int ompi_comm_cid_init ( void );
END_C_DECLS
#endif /* OMPI_COMMUNICATOR_H */

Просмотреть файл

@ -13,6 +13,7 @@
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -57,6 +58,28 @@ OMPI_DECLSPEC extern int ompi_mpi_thread_provided;
/** Identifier of the main thread */
OMPI_DECLSPEC extern struct opal_thread_t *ompi_mpi_main_thread;
/** Bitflags to be used for the modex exchange for the various thread
* levels. Required to support heterogeneous environments */
#define OMPI_THREADLEVEL_SINGLE_BF 0x00000001
#define OMPI_THREADLEVEL_FUNNELED_BF 0x00000002
#define OMPI_THREADLEVEL_SERIALIZED_BF 0x00000004
#define OMPI_THREADLEVEL_MULTIPLE_BF 0x00000008
#define OMPI_THREADLEVEL_SET_BITFLAG(threadlevelin,threadlevelout) { \
if ( MPI_THREAD_SINGLE == threadlevelin ) { \
threadlevelout |= OMPI_THREADLEVEL_SINGLE_BF; \
} else if ( MPI_THREAD_FUNNELED == threadlevelin ) { \
threadlevelout |= OMPI_THREADLEVEL_FUNNELED_BF; \
} else if ( MPI_THREAD_SERIALIZED == threadlevelin ) { \
threadlevelout |= OMPI_THREADLEVEL_SERIALIZED_BF; \
} else if ( MPI_THREAD_MULTIPLE == threadlevelin ) { \
threadlevelout |= OMPI_THREADLEVEL_MULTIPLE_BF; \
}}
#define OMPI_THREADLEVEL_IS_MULTIPLE(threadlevel) (threadlevel & OMPI_THREADLEVEL_MULTIPLE_BF)
/** Do we want to be warned on fork or not? */
OMPI_DECLSPEC extern bool ompi_warn_on_fork;

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006 University of Houston. All rights reserved.
* Copyright (c) 2006-2009 University of Houston. All rights reserved.
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
*
* $COPYRIGHT$
@ -64,6 +64,7 @@
#include "ompi/mpi/f77/constants.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/runtime/params.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/communicator/communicator.h"
#include "ompi/info/info.h"
#include "ompi/errhandler/errcode.h"
@ -292,6 +293,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
bool orte_setup = false;
bool paffinity_enabled = false;
/* bitflag of the thread level support provided. To be used
* for the modex in order to work in heterogeneous environments. */
uint8_t threadlevel_bf;
/* Setup enough to check get/set MCA params */
if (ORTE_SUCCESS != (ret = opal_init_util())) {
@ -383,6 +388,17 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
ompi_mpi_thread_multiple = (ompi_mpi_thread_provided ==
MPI_THREAD_MULTIPLE);
/* determine the bitflag belonging to the threadlevel_support provided */
memset ( &threadlevel_bf, 0, sizeof(uint8_t));
OMPI_THREADLEVEL_SET_BITFLAG ( ompi_mpi_thread_provided, threadlevel_bf );
/* add this bitflag to the modex */
if ( OMPI_SUCCESS != (ret = ompi_modex_send_string("MPI_THREAD_LEVEL", &threadlevel_bf, sizeof(uint8_t)))) {
error = "ompi_mpi_init: modex send thread level";
goto error;
}
/* Once we've joined the RTE, see if any MCA parameters were
passed to the MPI level */
@ -795,6 +811,19 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
goto error;
}
/* Determine the overall threadlevel support of all processes
in MPI_COMM_WORLD. This has to be done before calling
coll_base_comm_select, since some of the collective components
e.g. hierarch, might create subcommunicators. The threadlevel
requested by all processes is required in order to know
which cid allocation algorithm can be used. */
if ( OMPI_SUCCESS !=
( ret = ompi_comm_cid_init ())) {
error = "ompi_mpi_init: ompi_comm_cid_init failed";
goto error;
}
/* Init coll for the comms. This has to be after dpm_base_select,
(since dpm.mark_dyncomm is not set in the communicator creation
function else), but before dpm.dyncom_init, since this function