1
1

Added several new COMM_TYPE_<> splits

Using the underlying hardware identification to split
communicators based on locality has been enabled using
the MPI_Comm_Split_Type function.

Currently implemented split's are:
  HWTHREAD
  CORE
  L1CACHE
  L2CACHE
  L3CACHE
  SOCKET
  NUMA
  NODE
  BOARD
  HOST
  CU
  CLUSTER

However only NODE is defined in the standard which is why the
remaning splits are referred to using the OMPI_ prefix instead
of the standard MPI_ prefix.

I have tested this using --without-hwloc and --with-hwloc=<path>
which both give the same output.

NOTE: I think something fishy is going on in the locality operators.
In my test-program I couldn't get the correct split on these requests:
  NUMA, SOCKET, L3CACHE
where I suspected a full communicator but only got one.
Этот коммит содержится в:
Nick Papior Andersen 2014-12-24 11:21:35 +00:00
родитель ffbf9738a3
Коммит 3deda3dc82
5 изменённых файлов: 340 добавлений и 20 удалений

Просмотреть файл

@ -692,7 +692,7 @@ ompi_comm_split_type(ompi_communicator_t *comm,
int my_rsize;
int mode;
int rsize;
int i, loc;
int i, loc, found;
int inter;
int *results=NULL, *sorted=NULL;
int *rresults=NULL, *rsorted=NULL;
@ -711,7 +711,51 @@ ompi_comm_split_type(ompi_communicator_t *comm,
/* --------------------------------------------------------- */
/* sort according to participation and rank. Gather information from everyone */
myinfo[0] = (split_type == MPI_COMM_TYPE_SHARED) ? 1 : 0;
/* allowed splitting types:
CLUSTER
CU
HOST
BOARD
NODE
NUMA
SOCKET
L3CACHE
L2CACHE
L1CACHE
CORE
HWTHREAD
Even though HWTHREAD/CORE etc. is overkill they are here for consistency.
They will most likely return a communicator which is equal to MPI_COMM_SELF
Unless oversubscribing.
*/
myinfo[0] = 0; // default to no type splitting (also if non-recognized split-type)
switch ( split_type ) {
case OMPI_COMM_TYPE_HWTHREAD:
myinfo[0] = 1; break;
case OMPI_COMM_TYPE_CORE:
myinfo[0] = 2; break;
case OMPI_COMM_TYPE_L1CACHE:
myinfo[0] = 3; break;
case OMPI_COMM_TYPE_L2CACHE:
myinfo[0] = 4; break;
case OMPI_COMM_TYPE_L3CACHE:
myinfo[0] = 5; break;
case OMPI_COMM_TYPE_SOCKET:
myinfo[0] = 6; break;
case OMPI_COMM_TYPE_NUMA:
myinfo[0] = 7; break;
//case MPI_COMM_TYPE_SHARED: // the standard implemented type
case OMPI_COMM_TYPE_NODE:
myinfo[0] = 8; break;
case OMPI_COMM_TYPE_BOARD:
myinfo[0] = 9; break;
case OMPI_COMM_TYPE_HOST:
myinfo[0] = 10; break;
case OMPI_COMM_TYPE_CU:
myinfo[0] = 11; break;
case OMPI_COMM_TYPE_CLUSTER:
myinfo[0] = 12; break;
}
myinfo[1] = key;
size = ompi_comm_size ( comm );
@ -731,13 +775,65 @@ ompi_comm_split_type(ompi_communicator_t *comm,
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
/* check that all processors have been called with the same value */
for ( i=0; i < size; i++) {
if ( results[2*i] != myinfo[0] ) {
rc = OMPI_ERR_BAD_PARAM;
goto exit;
}
}
/* how many are participating and on my node? */
for ( my_size = 0, i=0; i < size; i++) {
if ( results[(2*i)+0] == 1) {
if ( results[2*i] == 1 ) {
if (OPAL_PROC_ON_LOCAL_HWTHREAD(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
my_size++;
}
} else if ( results[2*i] == 2 ) {
if (OPAL_PROC_ON_LOCAL_CORE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
my_size++;
}
} else if ( results[2*i] == 3 ) {
if (OPAL_PROC_ON_LOCAL_L1CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
my_size++;
}
} else if ( results[2*i] == 4 ) {
if (OPAL_PROC_ON_LOCAL_L2CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
my_size++;
}
} else if ( results[2*i] == 5 ) {
if (OPAL_PROC_ON_LOCAL_L3CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
my_size++;
}
} else if ( results[2*i] == 6 ) {
if (OPAL_PROC_ON_LOCAL_SOCKET(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
my_size++;
}
} else if ( results[2*i] == 7 ) {
if (OPAL_PROC_ON_LOCAL_NUMA(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
my_size++;
}
} else if ( results[2*i] == 8 ) {
if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
my_size++;
}
} else if ( results[2*i] == 9 ) {
if (OPAL_PROC_ON_LOCAL_BOARD(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
my_size++;
}
} else if ( results[2*i] == 10 ) {
if (OPAL_PROC_ON_LOCAL_HOST(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
my_size++;
}
} else if ( results[2*i] == 11 ) {
if (OPAL_PROC_ON_LOCAL_CU(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
my_size++;
}
} else if ( results[2*i] == 12 ) {
if (OPAL_PROC_ON_LOCAL_CLUSTER(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
my_size++;
}
}
}
@ -755,12 +851,62 @@ ompi_comm_split_type(ompi_communicator_t *comm,
/* ok we can now fill this info */
for( loc = 0, i = 0; i < size; i++ ) {
if ( results[(2*i)+0] == 1) {
if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
sorted[(2*loc)+0] = i; /* copy org rank */
sorted[(2*loc)+1] = results[(2*i)+1]; /* copy key */
loc++;
found = 0;
if ( results[2*i] == 1 ) {
if (OPAL_PROC_ON_LOCAL_HWTHREAD(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( results[2*i] == 2 ) {
if (OPAL_PROC_ON_LOCAL_CORE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( results[2*i] == 3 ) {
if (OPAL_PROC_ON_LOCAL_L1CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( results[2*i] == 4 ) {
if (OPAL_PROC_ON_LOCAL_L2CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( results[2*i] == 5 ) {
if (OPAL_PROC_ON_LOCAL_L3CACHE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( results[2*i] == 6 ) {
if (OPAL_PROC_ON_LOCAL_SOCKET(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( results[2*i] == 7 ) {
if (OPAL_PROC_ON_LOCAL_NUMA(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( results[2*i] == 8 ) {
if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( results[2*i] == 9 ) {
if (OPAL_PROC_ON_LOCAL_BOARD(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( results[2*i] == 10 ) {
if (OPAL_PROC_ON_LOCAL_HOST(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( results[2*i] == 11 ) {
if (OPAL_PROC_ON_LOCAL_CU(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( results[2*i] == 12 ) {
if (OPAL_PROC_ON_LOCAL_CLUSTER(ompi_group_peer_lookup(comm->c_local_group, i)->super.proc_flags)) {
found = 1;
}
}
/* we have found and occupied the index (i) */
if ( found == 1 ) {
sorted[2*loc ] = i; /* copy org rank */
sorted[2*loc+1] = results[2*i+1]; /* copy key */
loc++;
}
}
@ -800,10 +946,54 @@ ompi_comm_split_type(ompi_communicator_t *comm,
/* how many are participating and on my node? */
for ( my_rsize = 0, i=0; i < rsize; i++) {
if ( rresults[(2*i)+0] == 1) {
if ( rresults[2*i] == 1 ) {
if (OPAL_PROC_ON_LOCAL_HWTHREAD(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
my_rsize++;
}
} else if ( rresults[2*i] == 2 ) {
if (OPAL_PROC_ON_LOCAL_CORE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
my_rsize++;
}
} else if ( rresults[2*i] == 3 ) {
if (OPAL_PROC_ON_LOCAL_L1CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
my_rsize++;
}
} else if ( rresults[2*i] == 4 ) {
if (OPAL_PROC_ON_LOCAL_L2CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
my_rsize++;
}
} else if ( rresults[2*i] == 5 ) {
if (OPAL_PROC_ON_LOCAL_L3CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
my_rsize++;
}
} else if ( rresults[2*i] == 6 ) {
if (OPAL_PROC_ON_LOCAL_SOCKET(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
my_rsize++;
}
} else if ( rresults[2*i] == 7 ) {
if (OPAL_PROC_ON_LOCAL_NUMA(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
my_rsize++;
}
} else if ( rresults[2*i] == 8 ) {
if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
my_rsize++;
}
} else if ( rresults[2*i] == 9 ) {
if (OPAL_PROC_ON_LOCAL_BOARD(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
my_rsize++;
}
} else if ( rresults[2*i] == 10 ) {
if (OPAL_PROC_ON_LOCAL_HOST(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
my_rsize++;
}
} else if ( rresults[2*i] == 11 ) {
if (OPAL_PROC_ON_LOCAL_CU(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
my_rsize++;
}
} else if ( rresults[2*i] == 12 ) {
if (OPAL_PROC_ON_LOCAL_CLUSTER(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
my_rsize++;
}
}
}
@ -816,12 +1006,61 @@ ompi_comm_split_type(ompi_communicator_t *comm,
/* ok we can now fill this info */
for( loc = 0, i = 0; i < rsize; i++ ) {
if ( rresults[(2*i)+0] == 1) {
if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
rsorted[(2*loc)+0] = i; /* org rank */
rsorted[(2*loc)+1] = rresults[(2*i)+1]; /* key */
loc++;
found = 0;
if ( rresults[2*i] == 1 ) {
if (OPAL_PROC_ON_LOCAL_HWTHREAD(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( rresults[2*i] == 2 ) {
if (OPAL_PROC_ON_LOCAL_CORE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( rresults[2*i] == 3 ) {
if (OPAL_PROC_ON_LOCAL_L1CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( rresults[2*i] == 4 ) {
if (OPAL_PROC_ON_LOCAL_L2CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( rresults[2*i] == 5 ) {
if (OPAL_PROC_ON_LOCAL_L3CACHE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( rresults[2*i] == 6 ) {
if (OPAL_PROC_ON_LOCAL_SOCKET(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( rresults[2*i] == 7 ) {
if (OPAL_PROC_ON_LOCAL_NUMA(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( rresults[2*i] == 8 ) {
if (OPAL_PROC_ON_LOCAL_NODE(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( rresults[2*i] == 9 ) {
if (OPAL_PROC_ON_LOCAL_BOARD(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( rresults[2*i] == 10 ) {
if (OPAL_PROC_ON_LOCAL_HOST(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( rresults[2*i] == 11 ) {
if (OPAL_PROC_ON_LOCAL_CU(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
found = 1;
}
} else if ( rresults[2*i] == 12 ) {
if (OPAL_PROC_ON_LOCAL_CLUSTER(ompi_group_peer_lookup(comm->c_remote_group, i)->super.proc_flags)) {
found = 1;
}
}
if ( found == 1 ) {
rsorted[2*loc ] = i; /* org rank */
rsorted[2*loc+1] = rresults[2*i+1]; /* key */
loc++;
}
}

Просмотреть файл

@ -667,8 +667,20 @@ enum {
* (see also mpif-common.h.fin).
*/
enum {
MPI_COMM_TYPE_SHARED
OMPI_COMM_TYPE_HWTHREAD,
OMPI_COMM_TYPE_CORE,
OMPI_COMM_TYPE_L1CACHE,
OMPI_COMM_TYPE_L2CACHE,
OMPI_COMM_TYPE_L3CACHE,
OMPI_COMM_TYPE_SOCKET,
OMPI_COMM_TYPE_NUMA,
OMPI_COMM_TYPE_NODE,
OMPI_COMM_TYPE_BOARD,
OMPI_COMM_TYPE_HOST,
OMPI_COMM_TYPE_CU,
OMPI_COMM_TYPE_CLUSTER
};
#define MPI_COMM_TYPE_SHARED OMPI_COMM_TYPE_NODE
/*
* MPIT Verbosity Levels

Просмотреть файл

@ -354,7 +354,19 @@ $constants->{MPI_COMBINER_F90_INTEGER} = 16;
$constants->{MPI_COMBINER_RESIZED} = 17;
$constants->{MPI_COMBINER_HINDEXED_BLOCK} = 18;
$constants->{MPI_COMM_TYPE_SHARED} = 0;
$constants->{OMPI_COMM_TYPE_HWTHREAD} = 0;
$constants->{OMPI_COMM_TYPE_CORE} = 1;
$constants->{OMPI_COMM_TYPE_L1CACHE} = 2;
$constants->{OMPI_COMM_TYPE_L2CACHE} = 3;
$constants->{OMPI_COMM_TYPE_L3CACHE} = 4;
$constants->{OMPI_COMM_TYPE_SOCKET} = 5;
$constants->{OMPI_COMM_TYPE_NUMA} = 6;
$constants->{OMPI_COMM_TYPE_NODE} = 7;
$constants->{MPI_COMM_TYPE_SHARED} = 7;
$constants->{OMPI_COMM_TYPE_BOARD} = 8;
$constants->{OMPI_COMM_TYPE_HOST} = 9;
$constants->{OMPI_COMM_TYPE_CU} = 10;
$constants->{OMPI_COMM_TYPE_CLUSTER} = 11;
#----------------------------------------------------------------------------

Просмотреть файл

@ -61,7 +61,19 @@ int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key,
FUNC_NAME);
}
if ( MPI_COMM_TYPE_SHARED != split_type &&
if ( MPI_COMM_TYPE_SHARED != split_type && // Same as OMPI_COMM_TYPE_NODE
OMPI_COMM_TYPE_CLUSTER != split_type &&
OMPI_COMM_TYPE_CU != split_type &&
OMPI_COMM_TYPE_HOST != split_type &&
OMPI_COMM_TYPE_BOARD != split_type &&
OMPI_COMM_TYPE_NODE != split_type && // Same as MPI_COMM_TYPE_SHARED
OMPI_COMM_TYPE_NUMA != split_type &&
OMPI_COMM_TYPE_SOCKET != split_type &&
OMPI_COMM_TYPE_L3CACHE != split_type &&
OMPI_COMM_TYPE_L2CACHE != split_type &&
OMPI_COMM_TYPE_L1CACHE != split_type &&
OMPI_COMM_TYPE_CORE != split_type &&
OMPI_COMM_TYPE_HWTHREAD != split_type &&
MPI_UNDEFINED != split_type ) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG,
FUNC_NAME);

Просмотреть файл

@ -62,10 +62,53 @@ value MPI_UNDEFINED, in which case newcomm returns MPI_COMM_NULL.
.SH SPLIT TYPES
.ft R
.TP 1i
MPI_COMM_TYPE_SHARED
MPI_COMM_TYPE_SHARED|OMPI_COMM_TYPE_NODE
This type splits the communicator into subcommunicators, each of which can create a shared memory region.
.ft R
.TP 1i
OMPI_COMM_TYPE_HWTHREAD
This type splits the communicator into subcommunicators, each of which belongs to the same hardware thread.
.ft R
.TP 1i
OMPI_COMM_TYPE_CORE
This type splits the communicator into subcommunicators, each of which belongs to the same core/processing unit.
.ft R
.TP 1i
OMPI_COMM_TYPE_L1CACHE
This type splits the communicator into subcommunicators, each of which belongs to the same L1 cache.
.ft R
.TP 1i
OMPI_COMM_TYPE_L2CACHE
This type splits the communicator into subcommunicators, each of which belongs to the same L2 cache.
.ft R
.TP 1i
OMPI_COMM_TYPE_L3CACHE
This type splits the communicator into subcommunicators, each of which belongs to the same L3 cache.
.ft R
.TP 1i
OMPI_COMM_TYPE_SOCKET
This type splits the communicator into subcommunicators, each of which belongs to the same socket.
.ft R
.TP 1i
OMPI_COMM_TYPE_NUMA
This type splits the communicator into subcommunicators, each of which belongs to the same NUMA-node.
.ft R
.TP 1i
OMPI_COMM_TYPE_BOARD
This type splits the communicator into subcommunicators, each of which belongs to the same board.
.ft R
.TP 1i
OMPI_COMM_TYPE_HOST
This type splits the communicator into subcommunicators, each of which belongs to the same host.
.ft R
.TP 1i
OMPI_COMM_TYPE_CU
This type splits the communicator into subcommunicators, each of which belongs to the same computational unit.
.ft R
.TP 1i
OMPI_COMM_TYPE_CLUSTER
This type splits the communicator into subcommunicators, each of which belongs to the same cluster.
.SH NOTES
.ft R
@ -79,6 +122,8 @@ Multiple calls to MPI_Comm_split_type can be used to overcome the requirement th
Note that keys need not be unique. It is MPI_Comm_split_type's responsibility to sort processes in ascending order according to this key, and to break ties in a consistent way. If all the keys are specified in the same way, then all the processes in a given color will have the relative rank order as they did in their parent group. (In general, they will have different ranks.)
.sp
Essentially, making the key value zero for all processes of a given split_type means that one needn't really pay attention to the rank-order of the processes in the new communicator.
.sp
The communicator coloring denoted with OMPI instead of MPI are specific to OpenMPI only and are not part of the standard.
.SH ERRORS
Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument.