added LibNBC (http://www.unixer.de/NBC) as collv1 (blocking) component.
I know it does not make much sense but one can play around with the performance. Numbers are available at http://www.unixer.de/research/nbcoll/perf/. This is the first step towards collv2. Next step includes the addition of non-blocking functions to the MPI-Layer and the collv1 interface. It implements all MPI-1 collective algorithms in a non-blocking manner. However, the collv1 interface does not allow non-blocking collectives so that all collectives are used blocking by the ompi-glue layer. I wanted to add LibNBC as a separate subdirectory, but I could not convince the buildsystem (and had not the time). So the component looks pretty messy. It would be great if somebody could explain me how to move all nbc*{c,h}, and {hb,dict}*{c,h} to a seperate subdirectory. It's .ompi_ignored because I did not test it exhaustively yet. This commit was SVN r11401.
Этот коммит содержится в:
родитель
3f0a7cad9e
Коммит
6b22641669
0
ompi/mca/coll/libnbc/.ompi_ignore
Обычный файл
0
ompi/mca/coll/libnbc/.ompi_ignore
Обычный файл
7
ompi/mca/coll/libnbc/COPYRIGHT
Обычный файл
7
ompi/mca/coll/libnbc/COPYRIGHT
Обычный файл
@ -0,0 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2006 The Technical University of Chemnitz. All
|
||||
* rights reserved.
|
||||
*/
|
40
ompi/mca/coll/libnbc/LICENSE
Обычный файл
40
ompi/mca/coll/libnbc/LICENSE
Обычный файл
@ -0,0 +1,40 @@
|
||||
Copyright (c) 2006 The Trustees of Indiana University and Indiana
|
||||
University Research and Technology Corporation. All
|
||||
rights reserved.
|
||||
Copyright (c) 2006 The Technical University of Chemnitz. All rights
|
||||
reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer listed
|
||||
in this license in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
|
||||
- Neither the name of the copyright holders nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
The copyright holders provide no reassurances that the source code
|
||||
provided does not infringe any patent, copyright, or any other
|
||||
intellectual property rights of third parties. The copyright holders
|
||||
disclaim any liability to any recipient for claims brought against
|
||||
recipient by any third party for infringement of that parties
|
||||
intellectual property rights.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
1416
ompi/mca/coll/libnbc/Makefile
Обычный файл
1416
ompi/mca/coll/libnbc/Makefile
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
68
ompi/mca/coll/libnbc/Makefile.am
Обычный файл
68
ompi/mca/coll/libnbc/Makefile.am
Обычный файл
@ -0,0 +1,68 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
sources = \
|
||||
dict.c \
|
||||
hb_tree.c \
|
||||
nbc.c \
|
||||
nbc_iallgather.c \
|
||||
nbc_iallgatherv.c \
|
||||
nbc_iallreduce.c \
|
||||
nbc_ialltoall.c \
|
||||
nbc_ialltoallv.c \
|
||||
nbc_ibarrier.c \
|
||||
nbc_ibcast.c \
|
||||
nbc_igather.c \
|
||||
nbc_igatherv.c \
|
||||
nbc_ireduce.c \
|
||||
nbc_ireduce_scatter.c \
|
||||
nbc_iscan.c \
|
||||
nbc_iscatter.c \
|
||||
nbc_iscatterv.c \
|
||||
nbc_op.c \
|
||||
ompi_component.c \
|
||||
ompi_component.h \
|
||||
dict.h \
|
||||
dict_private.h \
|
||||
hb_tree.h \
|
||||
nbc.h
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_coll_libnbc_DSO
|
||||
component_noinst =
|
||||
component_install = mca_coll_libnbc.la
|
||||
else
|
||||
component_noinst = libmca_coll_libnbc.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(libdir)/openmpi
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_coll_libnbc_la_SOURCES = $(sources)
|
||||
mca_coll_libnbc_la_LDFLAGS = -module -avoid-version
|
||||
mca_coll_libnbc_la_LIBADD = \
|
||||
$(top_ompi_builddir)/ompi/libmpi.la \
|
||||
$(top_ompi_builddir)/orte/liborte.la \
|
||||
$(top_ompi_builddir)/opal/libopal.la
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_coll_libnbc_la_SOURCES =$(sources)
|
||||
libmca_coll_libnbc_la_LDFLAGS = -module -avoid-version
|
1416
ompi/mca/coll/libnbc/Makefile.in
Обычный файл
1416
ompi/mca/coll/libnbc/Makefile.in
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
17
ompi/mca/coll/libnbc/README
Обычный файл
17
ompi/mca/coll/libnbc/README
Обычный файл
@ -0,0 +1,17 @@
|
||||
* TODO:
|
||||
- support MPI-2 collectives
|
||||
- support MPI-2 Features (MPI_IN_PLACE)
|
||||
- support MPI-2 Requests (really? -> I don't think so :)
|
||||
|
||||
* Missing for MPI-1:
|
||||
- FORTRAN Bindings
|
||||
- add user defined operations (coll9, coll10, coll11, longuser)
|
||||
-- how do we ensure that we do not collide with Intrinsic Operations if
|
||||
we issue NBC_Ops???
|
||||
-- we cannot issue NBC_Ops ... we need to issue MPI_Ops :-(.
|
||||
-- hmm, we could simply wrap it and save the user defined op in a
|
||||
list (hash) and search this every time we get called
|
||||
--> cool idea, let's do that ...
|
||||
|
||||
* No Idea:
|
||||
- what is wrong with nbcoll (does not work with Open MPI blocking colls)
|
12
ompi/mca/coll/libnbc/README.dist
Обычный файл
12
ompi/mca/coll/libnbc/README.dist
Обычный файл
@ -0,0 +1,12 @@
|
||||
Building LibNBC:
|
||||
- set the right MPI-c Compilerwrapper in the Makefile (default: mpicc)
|
||||
- build libnbc.a with 'make'
|
||||
|
||||
Linking Programs with LibNBC:
|
||||
- import nbc.h
|
||||
- link with -lnbc
|
||||
|
||||
Further Information and examples:
|
||||
- http://www.unixer.de/NBC
|
||||
|
||||
SVN Version: r60
|
12
ompi/mca/coll/libnbc/c2f.m4
Обычный файл
12
ompi/mca/coll/libnbc/c2f.m4
Обычный файл
@ -0,0 +1,12 @@
|
||||
dnl/*
|
||||
dnl * Copyright (c) 2006 The Trustees of Indiana University and Indiana
|
||||
dnl * University Research and Technology
|
||||
dnl * Corporation. All rights reserved.
|
||||
dnl * Copyright (c) 2006 The Technical University of Chemnitz. All
|
||||
dnl * rights reserved.
|
||||
dnl */
|
||||
dnl
|
||||
define(MPI_Comm, int)dnl
|
||||
define(MPI_Op, int)dnl
|
||||
define(MPI_Datatype, int)dnl
|
||||
define(MPI_Request, int)dnl
|
21
ompi/mca/coll/libnbc/configure.params
Обычный файл
21
ompi/mca/coll/libnbc/configure.params
Обычный файл
@ -0,0 +1,21 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
PARAM_INIT_FILE=ompi_component.c
|
||||
PARAM_CONFIG_FILES=Makefile
|
106
ompi/mca/coll/libnbc/dict.c
Обычный файл
106
ompi/mca/coll/libnbc/dict.c
Обычный файл
@ -0,0 +1,106 @@
|
||||
/*
|
||||
* dict.c
|
||||
*
|
||||
* Implementation of generic dictionary routines.
|
||||
* Copyright (C) 2001-2004 Farooq Mela.
|
||||
*
|
||||
* $Id: dict.c,v 1.7 2001/11/25 06:00:49 farooq Exp farooq $
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "dict.h"
|
||||
#include "dict_private.h"
|
||||
|
||||
dict_malloc_func _dict_malloc = malloc;
|
||||
dict_free_func _dict_free = free;
|
||||
|
||||
dict_malloc_func
|
||||
dict_set_malloc(dict_malloc_func func)
|
||||
{
|
||||
dict_malloc_func old = _dict_malloc;
|
||||
_dict_malloc = func ? func : malloc;
|
||||
return old;
|
||||
}
|
||||
|
||||
dict_free_func
|
||||
dict_set_free(dict_free_func func)
|
||||
{
|
||||
dict_free_func old = _dict_free;
|
||||
_dict_free = func ? func : free;
|
||||
return old;
|
||||
}
|
||||
|
||||
/*
|
||||
* In comparing, we cannot simply subtract because that might result in signed
|
||||
* overflow.
|
||||
*/
|
||||
int
|
||||
dict_int_cmp(const void *k1, const void *k2)
|
||||
{
|
||||
const int *a = k1, *b = k2;
|
||||
|
||||
return (*a < *b) ? -1 : (*a > *b) ? +1 : 0;
|
||||
}
|
||||
|
||||
int
|
||||
dict_uint_cmp(const void *k1, const void *k2)
|
||||
{
|
||||
const unsigned int *a = k1, *b = k2;
|
||||
|
||||
return (*a < *b) ? -1 : (*a > *b) ? +1 : 0;
|
||||
}
|
||||
|
||||
int
|
||||
dict_long_cmp(const void *k1, const void *k2)
|
||||
{
|
||||
const long *a = k1, *b = k2;
|
||||
|
||||
return (*a < *b) ? -1 : (*a > *b) ? +1 : 0;
|
||||
}
|
||||
|
||||
int
|
||||
dict_ulong_cmp(const void *k1, const void *k2)
|
||||
{
|
||||
const unsigned long *a = k1, *b = k2;
|
||||
|
||||
return (*a < *b) ? -1 : (*a > *b) ? +1 : 0;
|
||||
}
|
||||
|
||||
int
|
||||
dict_ptr_cmp(const void *k1, const void *k2)
|
||||
{
|
||||
return (k1 > k2) - (k1 < k2);
|
||||
}
|
||||
|
||||
int
|
||||
dict_str_cmp(const void *k1, const void *k2)
|
||||
{
|
||||
const char *a = k1, *b = k2;
|
||||
char p, q;
|
||||
|
||||
for (;;) {
|
||||
p = *a++; q = *b++;
|
||||
if (p == 0 || p != q)
|
||||
break;
|
||||
}
|
||||
return (p > q) - (p < q);
|
||||
}
|
||||
|
||||
void
|
||||
dict_destroy(dict *dct, int del)
|
||||
{
|
||||
ASSERT(dct != NULL);
|
||||
|
||||
dct->_destroy(dct->_object, del);
|
||||
FREE(dct);
|
||||
}
|
||||
|
||||
void
|
||||
dict_itor_destroy(dict_itor *itor)
|
||||
{
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
itor->_destroy(itor->_itor);
|
||||
FREE(itor);
|
||||
}
|
140
ompi/mca/coll/libnbc/dict.h
Обычный файл
140
ompi/mca/coll/libnbc/dict.h
Обычный файл
@ -0,0 +1,140 @@
|
||||
/*
|
||||
* dict.h
|
||||
*
|
||||
* Interface for generic access to dictionary library.
|
||||
* Copyright (C) 2001-2004 Farooq Mela.
|
||||
*
|
||||
* $Id: dict.h,v 1.6 2001/11/14 05:21:10 farooq Exp farooq $
|
||||
*/
|
||||
|
||||
#ifndef _DICT_H_
|
||||
#define _DICT_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#define DICT_VERSION_MAJOR 0
|
||||
#define DICT_VERSION_MINOR 2
|
||||
#define DICT_VERSION_PATCH 1
|
||||
|
||||
#ifndef __P
|
||||
# if defined(__STDC__) || defined(__cplusplus) || defined(c_plusplus) || \
|
||||
defined(_MSC_VER)
|
||||
# define __P(x) x
|
||||
# else /* !__STDC__ && !__cplusplus && !c_plusplus && !_MSC_VER */
|
||||
# define __P(x)
|
||||
# endif
|
||||
#endif /* !__P */
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#endif
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE (!FALSE)
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
# define BEGIN_DECL extern "C" {
|
||||
# define END_DECL }
|
||||
#else
|
||||
# define BEGIN_DECL
|
||||
# define END_DECL
|
||||
#endif
|
||||
|
||||
BEGIN_DECL
|
||||
|
||||
typedef void *(*dict_malloc_func)(size_t);
|
||||
typedef void (*dict_free_func)(void *);
|
||||
|
||||
dict_malloc_func dict_set_malloc __P((dict_malloc_func func));
|
||||
dict_free_func dict_set_free __P((dict_free_func func));
|
||||
|
||||
typedef int (*dict_cmp_func) __P((const void *, const void *));
|
||||
typedef void (*dict_del_func) __P((void *));
|
||||
typedef int (*dict_vis_func) __P((const void *, void *));
|
||||
typedef unsigned (*dict_hsh_func) __P((const void *));
|
||||
|
||||
typedef struct dict dict;
|
||||
typedef struct dict_itor dict_itor;
|
||||
|
||||
struct dict {
|
||||
void *_object;
|
||||
int (*_insert) __P((void *obj, void *k, void *d, int ow));
|
||||
int (*_probe) __P((void *obj, void *key, void **dat));
|
||||
void *(*_search) __P((void *obj, const void *k));
|
||||
const void *(*_csearch) __P((const void *obj, const void *k));
|
||||
int (*_remove) __P((void *obj, const void *key, int del));
|
||||
void (*_walk) __P((void *obj, dict_vis_func func));
|
||||
unsigned (*_count) __P((const void *obj));
|
||||
void (*_empty) __P((void *obj, int del));
|
||||
void (*_destroy) __P((void *obj, int del));
|
||||
dict_itor *(*_inew) __P((void *obj));
|
||||
};
|
||||
|
||||
#define dict_private(dct) (dct)->_object
|
||||
#define dict_insert(dct,k,d,o) (dct)->_insert((dct)->_object, (k), (d), (o))
|
||||
#define dict_probe(dct,k,d) (dct)->_probe((dct)->_object, (k), (d))
|
||||
#define dict_search(dct,k) (dct)->_search((dct)->_object, (k))
|
||||
#define dict_csearch(dct,k) (dct)->_csearch((dct)->_object, (k))
|
||||
#define dict_remove(dct,k,del) (dct)->_remove((dct)->_object, (k), (del))
|
||||
#define dict_walk(dct,f) (dct)->_walk((dct)->_object, (f))
|
||||
#define dict_count(dct) (dct)->_count((dct)->_object)
|
||||
#define dict_empty(dct,d) (dct)->_empty((dct)->_object, (d))
|
||||
void dict_destroy __P((dict *dct, int del));
|
||||
#define dict_itor_new(dct) (dct)->_inew((dct)->_object)
|
||||
|
||||
struct dict_itor {
|
||||
void *_itor;
|
||||
int (*_valid) __P((const void *itor));
|
||||
void (*_invalid) __P((void *itor));
|
||||
int (*_next) __P((void *itor));
|
||||
int (*_prev) __P((void *itor));
|
||||
int (*_nextn) __P((void *itor, unsigned count));
|
||||
int (*_prevn) __P((void *itor, unsigned count));
|
||||
int (*_first) __P((void *itor));
|
||||
int (*_last) __P((void *itor));
|
||||
int (*_search) __P((void *itor, const void *key));
|
||||
const void *(*_key) __P((void *itor));
|
||||
void *(*_data) __P((void *itor));
|
||||
const void *(*_cdata) __P((const void *itor));
|
||||
int (*_setdata) __P((void *itor, void *dat, int del));
|
||||
int (*_remove) __P((void *itor, int del));
|
||||
int (*_compare) __P((void *itor1, void *itor2));
|
||||
void (*_destroy) __P((void *itor));
|
||||
};
|
||||
|
||||
#define dict_itor_private(i) (i)->_itor
|
||||
#define dict_itor_valid(i) (i)->_valid((i)->_itor)
|
||||
#define dict_itor_invalidate(i) (i)->_invalid((i)->_itor)
|
||||
#define dict_itor_next(i) (i)->_next((i)->_itor)
|
||||
#define dict_itor_prev(i) (i)->_prev((i)->_itor)
|
||||
#define dict_itor_nextn(i,n) (i)->_nextn((i)->_itor, (n))
|
||||
#define dict_itor_prevn(i,n) (i)->_prevn((i)->_itor, (n))
|
||||
#define dict_itor_first(i) (i)->_first((i)->_itor)
|
||||
#define dict_itor_last(i) (i)->_last((i)->_itor)
|
||||
#define dict_itor_search(i,k) (i)->_search((i)->_itor, (k))
|
||||
#define dict_itor_key(i) (i)->_key((i)->_itor)
|
||||
#define dict_itor_data(i) (i)->_data((i)->_itor)
|
||||
#define dict_itor_cdata(i) (i)->_cdata((i)->_itor)
|
||||
#define dict_itor_set_data(i,dat,d) (i)->_setdata((i)->_itor, (dat), (d))
|
||||
#define dict_itor_remove(i) (i)->_remove((i)->_itor)
|
||||
void dict_itor_destroy __P((dict_itor *itor));
|
||||
|
||||
int dict_int_cmp __P((const void *k1, const void *k2));
|
||||
int dict_uint_cmp __P((const void *k1, const void *k2));
|
||||
int dict_long_cmp __P((const void *k1, const void *k2));
|
||||
int dict_ulong_cmp __P((const void *k1, const void *k2));
|
||||
int dict_ptr_cmp __P((const void *k1, const void *k2));
|
||||
int dict_str_cmp __P((const void *k1, const void *k2));
|
||||
|
||||
END_DECL
|
||||
|
||||
/*#include "hashtable.h"*/
|
||||
#include "hb_tree.h"
|
||||
/*#include "pr_tree.h"
|
||||
#include "rb_tree.h"
|
||||
#include "sp_tree.h"
|
||||
#include "tr_tree.h"
|
||||
#include "wb_tree.h"*/
|
||||
|
||||
#endif /* !_DICT_H_ */
|
84
ompi/mca/coll/libnbc/dict_private.h
Обычный файл
84
ompi/mca/coll/libnbc/dict_private.h
Обычный файл
@ -0,0 +1,84 @@
|
||||
/*
|
||||
* dict_private.h
|
||||
*
|
||||
* Private definitions for libdict.
|
||||
* Copyright (C) 2001 Farooq Mela.
|
||||
*
|
||||
* $Id: dict_private.h,v 1.8 2002/01/02 09:14:11 farooq Exp $
|
||||
*/
|
||||
|
||||
#ifndef _DICT_PRIVATE_H_
|
||||
#define _DICT_PRIVATE_H_
|
||||
|
||||
#include "dict.h"
|
||||
|
||||
typedef int (*insert_func) __P((void *, void *k, void *d, int o));
|
||||
typedef int (*probe_func) __P((void *, void *k, void **d));
|
||||
typedef void *(*search_func) __P((void *, const void *k));
|
||||
typedef const void *(*csearch_func) __P((const void *, const void *k));
|
||||
typedef int (*remove_func) __P((void *, const void *k, int d));
|
||||
typedef void (*walk_func) __P((void *, dict_vis_func visit));
|
||||
typedef unsigned (*count_func) __P((const void *));
|
||||
typedef void (*empty_func) __P((void *, int del));
|
||||
typedef void (*destroy_func) __P((void *, int del));
|
||||
typedef dict_itor *(*inew_func) __P((void *));
|
||||
|
||||
typedef void (*idestroy_func) __P((void *));
|
||||
typedef int (*valid_func) __P((const void *));
|
||||
typedef void (*invalidate_func) __P((void *));
|
||||
typedef int (*next_func) __P((void *));
|
||||
typedef int (*prev_func) __P((void *));
|
||||
typedef int (*nextn_func) __P((void *, unsigned count));
|
||||
typedef int (*prevn_func) __P((void *, unsigned count));
|
||||
typedef int (*first_func) __P((void *));
|
||||
typedef int (*last_func) __P((void *));
|
||||
typedef int (*isearch_func) __P((void *, const void *k));
|
||||
typedef const void *(*key_func) __P((void *));
|
||||
typedef void *(*data_func) __P((void *));
|
||||
typedef const void *(*cdata_func) __P((const void *));
|
||||
typedef int (*dataset_func) __P((void *, void *d, int del));
|
||||
typedef int (*iremove_func) __P((void *, int del));
|
||||
typedef int (*icompare_func) __P((void *, void *itor2));
|
||||
|
||||
#ifndef NDEBUG
|
||||
# include <stdio.h>
|
||||
# undef ASSERT
|
||||
# if defined(__GNUC__)
|
||||
# define ASSERT(expr) \
|
||||
if (!(expr)) \
|
||||
fprintf(stderr, "\n%s:%d (%s) assertion failed: `%s'\n", \
|
||||
__FILE__, __LINE__, __PRETTY_FUNCTION__, #expr), \
|
||||
abort()
|
||||
# else
|
||||
# define ASSERT(expr) \
|
||||
if (!(expr)) \
|
||||
fprintf(stderr, "\n%s:%d assertion failed: `%s'\n", \
|
||||
__FILE__, __LINE__, #expr), \
|
||||
abort()
|
||||
# endif
|
||||
#else
|
||||
# define ASSERT(expr)
|
||||
#endif
|
||||
|
||||
extern dict_malloc_func _dict_malloc;
|
||||
extern dict_free_func _dict_free;
|
||||
#define MALLOC(n) (*_dict_malloc)(n)
|
||||
#define FREE(p) (*_dict_free)(p)
|
||||
|
||||
#define ABS(a) ((a) < 0 ? -(a) : +(a))
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
#define SWAP(a,b,v) v = (a), (a) = (b), (b) = v
|
||||
#define UNUSED(p) (void)&p
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define GCC_INLINE __inline__
|
||||
# define GCC_UNUSED __attribute__((__unused__))
|
||||
# define GCC_CONST __attribute__((__const__))
|
||||
#else
|
||||
# define GCC_INLINE
|
||||
# define GCC_UNUSED
|
||||
# define GCC_CONST
|
||||
#endif
|
||||
|
||||
#endif /* !_DICT_PRIVATE_H_ */
|
906
ompi/mca/coll/libnbc/hb_tree.c
Обычный файл
906
ompi/mca/coll/libnbc/hb_tree.c
Обычный файл
@ -0,0 +1,906 @@
|
||||
/*
|
||||
* hb_tree.c
|
||||
*
|
||||
* Implementation of height balanced tree.
|
||||
* Copyright (C) 2001-2004 Farooq Mela.
|
||||
*
|
||||
* $Id: hb_tree.c,v 1.10 2001/11/25 08:30:21 farooq Exp farooq $
|
||||
*
|
||||
* cf. [Gonnet 1984], [Knuth 1998]
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "hb_tree.h"
|
||||
#include "dict_private.h"
|
||||
|
||||
typedef signed char balance_t;
|
||||
|
||||
typedef struct hb_node hb_node;
|
||||
|
||||
struct hb_node {
|
||||
void *key;
|
||||
void *dat;
|
||||
hb_node *parent;
|
||||
hb_node *llink;
|
||||
hb_node *rlink;
|
||||
balance_t bal;
|
||||
};
|
||||
|
||||
struct hb_tree {
|
||||
hb_node *root;
|
||||
unsigned count;
|
||||
dict_cmp_func key_cmp;
|
||||
dict_del_func key_del;
|
||||
dict_del_func dat_del;
|
||||
};
|
||||
|
||||
struct hb_itor {
|
||||
hb_tree *tree;
|
||||
hb_node *node;
|
||||
};
|
||||
|
||||
static int rot_left __P((hb_tree *tree, hb_node *node));
|
||||
static int rot_right __P((hb_tree *tree, hb_node *node));
|
||||
static unsigned node_height __P((const hb_node *node));
|
||||
static unsigned node_mheight __P((const hb_node *node));
|
||||
static unsigned node_pathlen __P((const hb_node *node, unsigned level));
|
||||
static hb_node *node_new __P((void *key, void *dat));
|
||||
static hb_node *node_min __P((hb_node *node));
|
||||
static hb_node *node_max __P((hb_node *node));
|
||||
static hb_node *node_next __P((hb_node *node));
|
||||
static hb_node *node_prev __P((hb_node *node));
|
||||
|
||||
hb_tree *
|
||||
hb_tree_new(dict_cmp_func key_cmp, dict_del_func key_del,
|
||||
dict_del_func dat_del)
|
||||
{
|
||||
hb_tree *tree;
|
||||
|
||||
if ((tree = MALLOC(sizeof(*tree))) == NULL)
|
||||
return NULL;
|
||||
|
||||
tree->root = NULL;
|
||||
tree->count = 0;
|
||||
tree->key_cmp = key_cmp ? key_cmp : dict_ptr_cmp;
|
||||
tree->key_del = key_del;
|
||||
tree->dat_del = dat_del;
|
||||
|
||||
return tree;
|
||||
}
|
||||
|
||||
dict *
|
||||
hb_dict_new(dict_cmp_func key_cmp, dict_del_func key_del,
|
||||
dict_del_func dat_del)
|
||||
{
|
||||
dict *dct;
|
||||
hb_tree *tree;
|
||||
|
||||
if ((dct = MALLOC(sizeof(*dct))) == NULL)
|
||||
return NULL;
|
||||
|
||||
if ((tree = hb_tree_new(key_cmp, key_del, dat_del)) == NULL) {
|
||||
FREE(dct);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
dct->_object = tree;
|
||||
dct->_inew = (inew_func)hb_dict_itor_new;
|
||||
dct->_destroy = (destroy_func)hb_tree_destroy;
|
||||
dct->_insert = (insert_func)hb_tree_insert;
|
||||
dct->_probe = (probe_func)hb_tree_probe;
|
||||
dct->_search = (search_func)hb_tree_search;
|
||||
dct->_csearch = (csearch_func)hb_tree_csearch;
|
||||
dct->_remove = (remove_func)hb_tree_remove;
|
||||
dct->_empty = (empty_func)hb_tree_empty;
|
||||
dct->_walk = (walk_func)hb_tree_walk;
|
||||
dct->_count = (count_func)hb_tree_count;
|
||||
|
||||
return dct;
|
||||
}
|
||||
|
||||
void
|
||||
hb_tree_destroy(hb_tree *tree, int del)
|
||||
{
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
if (tree->root)
|
||||
hb_tree_empty(tree, del);
|
||||
|
||||
FREE(tree);
|
||||
}
|
||||
|
||||
void
|
||||
hb_tree_empty(hb_tree *tree, int del)
|
||||
{
|
||||
hb_node *node, *parent;
|
||||
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
node = tree->root;
|
||||
|
||||
while (node) {
|
||||
if (node->llink || node->rlink) {
|
||||
node = node->llink ? node->llink : node->rlink;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (del) {
|
||||
if (tree->key_del)
|
||||
tree->key_del(node->key);
|
||||
if (tree->dat_del)
|
||||
tree->dat_del(node->dat);
|
||||
}
|
||||
|
||||
parent = node->parent;
|
||||
FREE(node);
|
||||
|
||||
if (parent) {
|
||||
if (parent->llink == node)
|
||||
parent->llink = NULL;
|
||||
else
|
||||
parent->rlink = NULL;
|
||||
}
|
||||
node = parent;
|
||||
}
|
||||
|
||||
tree->root = NULL;
|
||||
tree->count = 0;
|
||||
}
|
||||
|
||||
void *
|
||||
hb_tree_search(hb_tree *tree, const void *key)
|
||||
{
|
||||
int rv;
|
||||
hb_node *node;
|
||||
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
node = tree->root;
|
||||
while (node) {
|
||||
rv = tree->key_cmp(key, node->key);
|
||||
if (rv < 0)
|
||||
node = node->llink;
|
||||
else if (rv > 0)
|
||||
node = node->rlink;
|
||||
else
|
||||
return node->dat;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const void *
|
||||
hb_tree_csearch(const hb_tree *tree, const void *key)
|
||||
{
|
||||
return hb_tree_csearch((hb_tree *)tree, key);
|
||||
}
|
||||
|
||||
int
|
||||
hb_tree_insert(hb_tree *tree, void *key, void *dat, int overwrite)
|
||||
{
|
||||
int rv = 0;
|
||||
hb_node *node, *parent = NULL, *q = NULL;
|
||||
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
node = tree->root;
|
||||
while (node) {
|
||||
rv = tree->key_cmp(key, node->key);
|
||||
if (rv < 0)
|
||||
parent = node, node = node->llink;
|
||||
else if (rv > 0)
|
||||
parent = node, node = node->rlink;
|
||||
else {
|
||||
if (overwrite == 0)
|
||||
return 1;
|
||||
if (tree->key_del)
|
||||
tree->key_del(node->key);
|
||||
if (tree->dat_del)
|
||||
tree->dat_del(node->dat);
|
||||
node->key = key;
|
||||
node->dat = dat;
|
||||
return 0;
|
||||
}
|
||||
if (parent->bal)
|
||||
q = parent;
|
||||
}
|
||||
|
||||
if ((node = node_new(key, dat)) == NULL)
|
||||
return -1;
|
||||
if ((node->parent = parent) == NULL) {
|
||||
tree->root = node;
|
||||
ASSERT(tree->count == 0);
|
||||
tree->count = 1;
|
||||
return 0;
|
||||
}
|
||||
if (rv < 0)
|
||||
parent->llink = node;
|
||||
else
|
||||
parent->rlink = node;
|
||||
|
||||
while (parent != q) {
|
||||
parent->bal = (parent->rlink == node) * 2 - 1;
|
||||
node = parent;
|
||||
parent = node->parent;
|
||||
}
|
||||
if (q) {
|
||||
if (q->llink == node) {
|
||||
if (--q->bal == -2) {
|
||||
if (q->llink->bal > 0)
|
||||
rot_left(tree, q->llink);
|
||||
rot_right(tree, q);
|
||||
}
|
||||
} else {
|
||||
if (++q->bal == +2) {
|
||||
if (q->rlink->bal < 0)
|
||||
rot_right(tree, q->rlink);
|
||||
rot_left(tree, q);
|
||||
}
|
||||
}
|
||||
}
|
||||
tree->count++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hb_tree_probe(hb_tree *tree, void *key, void **dat)
|
||||
{
|
||||
int rv = 0;
|
||||
hb_node *node, *parent = NULL, *q = NULL;
|
||||
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
node = tree->root;
|
||||
while (node) {
|
||||
rv = tree->key_cmp(key, node->key);
|
||||
if (rv < 0)
|
||||
parent = node, node = node->llink;
|
||||
else if (rv > 0)
|
||||
parent = node, node = node->rlink;
|
||||
else {
|
||||
*dat = node->dat;
|
||||
return 0;
|
||||
}
|
||||
if (parent->bal)
|
||||
q = parent;
|
||||
}
|
||||
|
||||
if ((node = node_new(key, *dat)) == NULL)
|
||||
return -1;
|
||||
if ((node->parent = parent) == NULL) {
|
||||
tree->root = node;
|
||||
ASSERT(tree->count == 0);
|
||||
tree->count = 1;
|
||||
return 1;
|
||||
}
|
||||
if (rv < 0)
|
||||
parent->llink = node;
|
||||
else
|
||||
parent->rlink = node;
|
||||
|
||||
while (parent != q) {
|
||||
parent->bal = (parent->rlink == node) * 2 - 1;
|
||||
node = parent;
|
||||
parent = parent->parent;
|
||||
}
|
||||
if (q) {
|
||||
if (q->llink == node) {
|
||||
if (--q->bal == -2) {
|
||||
if (q->llink->bal > 0)
|
||||
rot_left(tree, q->llink);
|
||||
rot_right(tree, q);
|
||||
}
|
||||
} else {
|
||||
if (++q->bal == +2) {
|
||||
if (q->rlink->bal < 0)
|
||||
rot_right(tree, q->rlink);
|
||||
rot_left(tree, q);
|
||||
}
|
||||
}
|
||||
}
|
||||
tree->count++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define FREE_NODE(n) \
|
||||
if (del) { \
|
||||
if (tree->key_del) \
|
||||
tree->key_del((n)->key); \
|
||||
if (tree->dat_del) \
|
||||
tree->dat_del((n)->dat); \
|
||||
} \
|
||||
FREE(n)
|
||||
|
||||
int
|
||||
hb_tree_remove(hb_tree *tree, const void *key, int del)
|
||||
{
|
||||
int rv, left;
|
||||
hb_node *node, *out, *parent = NULL;
|
||||
void *tmp;
|
||||
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
node = tree->root;
|
||||
while (node) {
|
||||
rv = tree->key_cmp(key, node->key);
|
||||
if (rv == 0)
|
||||
break;
|
||||
parent = node;
|
||||
node = rv < 0 ? node->llink : node->rlink;
|
||||
}
|
||||
if (node == NULL)
|
||||
return -1;
|
||||
|
||||
if (node->llink && node->rlink) {
|
||||
for (out = node->rlink; out->llink; out = out->llink)
|
||||
/* void */;
|
||||
SWAP(node->key, out->key, tmp);
|
||||
SWAP(node->dat, out->dat, tmp);
|
||||
node = out;
|
||||
parent = out->parent;
|
||||
}
|
||||
|
||||
out = node->llink ? node->llink : node->rlink;
|
||||
FREE_NODE(node);
|
||||
if (out)
|
||||
out->parent = parent;
|
||||
if (parent == NULL) {
|
||||
tree->root = out;
|
||||
tree->count--;
|
||||
return 0;
|
||||
}
|
||||
|
||||
left = parent->llink == node;
|
||||
if (left)
|
||||
parent->llink = out;
|
||||
else
|
||||
parent->rlink = out;
|
||||
|
||||
for (;;) {
|
||||
if (left) {
|
||||
if (++parent->bal == 0) {
|
||||
node = parent;
|
||||
goto higher;
|
||||
}
|
||||
if (parent->bal == +2) {
|
||||
ASSERT(parent->rlink != NULL);
|
||||
if (parent->rlink->bal < 0) {
|
||||
rot_right(tree, parent->rlink);
|
||||
rot_left(tree, parent);
|
||||
} else {
|
||||
ASSERT(parent->rlink->rlink != NULL);
|
||||
if (rot_left(tree, parent) == 0)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (--parent->bal == 0) {
|
||||
node = parent;
|
||||
goto higher;
|
||||
}
|
||||
if (parent->bal == -2) {
|
||||
ASSERT(parent->llink != NULL);
|
||||
if (parent->llink->bal > 0) {
|
||||
rot_left(tree, parent->llink);
|
||||
rot_right(tree, parent);
|
||||
} else {
|
||||
ASSERT(parent->llink->llink != NULL);
|
||||
if (rot_right(tree, parent) == 0)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Only get here on double rotations or single rotations that changed
|
||||
* subtree height - in either event, `parent->parent' is positioned
|
||||
* where `parent' was positioned before any rotations. */
|
||||
node = parent->parent;
|
||||
higher:
|
||||
if ((parent = node->parent) == NULL)
|
||||
break;
|
||||
left = parent->llink == node;
|
||||
}
|
||||
tree->count--;
|
||||
return 0;
|
||||
}
|
||||
|
||||
const void *
|
||||
hb_tree_min(const hb_tree *tree)
|
||||
{
|
||||
const hb_node *node;
|
||||
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
if (tree->root == NULL)
|
||||
return NULL;
|
||||
|
||||
for (node = tree->root; node->llink; node = node->llink)
|
||||
/* void */;
|
||||
return node->key;
|
||||
}
|
||||
|
||||
const void *
|
||||
hb_tree_max(const hb_tree *tree)
|
||||
{
|
||||
const hb_node *node;
|
||||
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
if ((node = tree->root) == NULL)
|
||||
return NULL;
|
||||
|
||||
for (; node->rlink; node = node->rlink)
|
||||
/* void */;
|
||||
return node->key;
|
||||
}
|
||||
|
||||
void
|
||||
hb_tree_walk(hb_tree *tree, dict_vis_func visit)
|
||||
{
|
||||
hb_node *node;
|
||||
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
if (tree->root == NULL)
|
||||
return;
|
||||
for (node = node_min(tree->root); node; node = node_next(node))
|
||||
if (visit(node->key, node->dat) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned
|
||||
hb_tree_count(const hb_tree *tree)
|
||||
{
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
return tree->count;
|
||||
}
|
||||
|
||||
unsigned
|
||||
hb_tree_height(const hb_tree *tree)
|
||||
{
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
return tree->root ? node_height(tree->root) : 0;
|
||||
}
|
||||
|
||||
unsigned
|
||||
hb_tree_mheight(const hb_tree *tree)
|
||||
{
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
return tree->root ? node_mheight(tree->root) : 0;
|
||||
}
|
||||
|
||||
unsigned
|
||||
hb_tree_pathlen(const hb_tree *tree)
|
||||
{
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
return tree->root ? node_pathlen(tree->root, 1) : 0;
|
||||
}
|
||||
|
||||
static hb_node *
|
||||
node_new(void *key, void *dat)
|
||||
{
|
||||
hb_node *node;
|
||||
|
||||
if ((node = MALLOC(sizeof(*node))) == NULL)
|
||||
return NULL;
|
||||
|
||||
node->key = key;
|
||||
node->dat = dat;
|
||||
node->parent = NULL;
|
||||
node->llink = NULL;
|
||||
node->rlink = NULL;
|
||||
node->bal = 0;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static hb_node *
|
||||
node_min(hb_node *node)
|
||||
{
|
||||
ASSERT(node != NULL);
|
||||
|
||||
while (node->llink)
|
||||
node = node->llink;
|
||||
return node;
|
||||
}
|
||||
|
||||
static hb_node *
|
||||
node_max(hb_node *node)
|
||||
{
|
||||
ASSERT(node != NULL);
|
||||
|
||||
while (node->rlink)
|
||||
node = node->rlink;
|
||||
return node;
|
||||
}
|
||||
|
||||
static hb_node *
|
||||
node_next(hb_node *node)
|
||||
{
|
||||
hb_node *temp;
|
||||
|
||||
ASSERT(node != NULL);
|
||||
|
||||
if (node->rlink) {
|
||||
for (node = node->rlink; node->llink; node = node->llink)
|
||||
/* void */;
|
||||
return node;
|
||||
}
|
||||
temp = node->parent;
|
||||
while (temp && temp->rlink == node) {
|
||||
node = temp;
|
||||
temp = temp->parent;
|
||||
}
|
||||
return temp;
|
||||
}
|
||||
|
||||
static hb_node *
|
||||
node_prev(hb_node *node)
|
||||
{
|
||||
hb_node *temp;
|
||||
|
||||
ASSERT(node != NULL);
|
||||
|
||||
if (node->llink) {
|
||||
for (node = node->llink; node->rlink; node = node->rlink)
|
||||
/* void */;
|
||||
return node;
|
||||
}
|
||||
temp = node->parent;
|
||||
while (temp && temp->llink == node) {
|
||||
node = temp;
|
||||
temp = temp->parent;
|
||||
}
|
||||
return temp;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
node_height(const hb_node *node)
|
||||
{
|
||||
unsigned l, r;
|
||||
|
||||
ASSERT(node != NULL);
|
||||
|
||||
l = node->llink ? node_height(node->llink) + 1 : 0;
|
||||
r = node->rlink ? node_height(node->rlink) + 1 : 0;
|
||||
return MAX(l, r);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
node_mheight(const hb_node *node)
|
||||
{
|
||||
unsigned l, r;
|
||||
|
||||
ASSERT(node != NULL);
|
||||
|
||||
l = node->llink ? node_mheight(node->llink) + 1 : 0;
|
||||
r = node->rlink ? node_mheight(node->rlink) + 1 : 0;
|
||||
return MIN(l, r);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
node_pathlen(const hb_node *node, unsigned level)
|
||||
{
|
||||
unsigned n = 0;
|
||||
|
||||
ASSERT(node != NULL);
|
||||
|
||||
if (node->llink)
|
||||
n += level + node_pathlen(node->llink, level + 1);
|
||||
if (node->rlink)
|
||||
n += level + node_pathlen(node->rlink, level + 1);
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
* rot_left(T, B):
|
||||
*
|
||||
* / /
|
||||
* B D
|
||||
* / \ / \
|
||||
* A D ==> B E
|
||||
* / \ / \
|
||||
* C E A C
|
||||
*
|
||||
*/
|
||||
static int
|
||||
rot_left(hb_tree *tree, hb_node *node)
|
||||
{
|
||||
int hc;
|
||||
hb_node *rlink, *parent;
|
||||
|
||||
ASSERT(tree != NULL);
|
||||
ASSERT(node != NULL);
|
||||
ASSERT(node->rlink != NULL);
|
||||
|
||||
rlink = node->rlink;
|
||||
node->rlink = rlink->llink;
|
||||
if (rlink->llink)
|
||||
rlink->llink->parent = node;
|
||||
parent = node->parent;
|
||||
rlink->parent = parent;
|
||||
if (parent) {
|
||||
if (parent->llink == node)
|
||||
parent->llink = rlink;
|
||||
else
|
||||
parent->rlink = rlink;
|
||||
} else {
|
||||
tree->root = rlink;
|
||||
}
|
||||
rlink->llink = node;
|
||||
node->parent = rlink;
|
||||
|
||||
hc = rlink->bal != 0;
|
||||
node->bal -= 1 + MAX(rlink->bal, 0);
|
||||
rlink->bal -= 1 - MIN(node->bal, 0);
|
||||
return hc;
|
||||
}
|
||||
|
||||
/*
|
||||
* rot_right(T, D):
|
||||
*
|
||||
* / /
|
||||
* D B
|
||||
* / \ / \
|
||||
* B E ==> A D
|
||||
* / \ / \
|
||||
* A C C E
|
||||
*
|
||||
*/
|
||||
static int
|
||||
rot_right(hb_tree *tree, hb_node *node)
|
||||
{
|
||||
int hc;
|
||||
hb_node *llink, *parent;
|
||||
|
||||
ASSERT(tree != NULL);
|
||||
ASSERT(node != NULL);
|
||||
ASSERT(node->llink != NULL);
|
||||
|
||||
llink = node->llink;
|
||||
node->llink = llink->rlink;
|
||||
if (llink->rlink)
|
||||
llink->rlink->parent = node;
|
||||
parent = node->parent;
|
||||
llink->parent = parent;
|
||||
if (parent) {
|
||||
if (parent->llink == node)
|
||||
parent->llink = llink;
|
||||
else
|
||||
parent->rlink = llink;
|
||||
} else {
|
||||
tree->root = llink;
|
||||
}
|
||||
llink->rlink = node;
|
||||
node->parent = llink;
|
||||
|
||||
hc = llink->bal != 0;
|
||||
node->bal += 1 - MIN(llink->bal, 0);
|
||||
llink->bal += 1 + MAX(node->bal, 0);
|
||||
return hc;
|
||||
}
|
||||
|
||||
hb_itor *
|
||||
hb_itor_new(hb_tree *tree)
|
||||
{
|
||||
hb_itor *itor;
|
||||
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
if ((itor = MALLOC(sizeof(*itor))) == NULL)
|
||||
return NULL;
|
||||
|
||||
itor->tree = tree;
|
||||
hb_itor_first(itor);
|
||||
return itor;
|
||||
}
|
||||
|
||||
dict_itor *
|
||||
hb_dict_itor_new(hb_tree *tree)
|
||||
{
|
||||
dict_itor *itor;
|
||||
|
||||
ASSERT(tree != NULL);
|
||||
|
||||
if ((itor = MALLOC(sizeof(*itor))) == NULL)
|
||||
return NULL;
|
||||
|
||||
if ((itor->_itor = hb_itor_new(tree)) == NULL) {
|
||||
FREE(itor);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
itor->_destroy = (idestroy_func)hb_itor_destroy;
|
||||
itor->_valid = (valid_func)hb_itor_valid;
|
||||
itor->_invalid = (invalidate_func)hb_itor_invalidate;
|
||||
itor->_next = (next_func)hb_itor_next;
|
||||
itor->_prev = (prev_func)hb_itor_prev;
|
||||
itor->_nextn = (nextn_func)hb_itor_nextn;
|
||||
itor->_prevn = (prevn_func)hb_itor_prevn;
|
||||
itor->_first = (first_func)hb_itor_first;
|
||||
itor->_last = (last_func)hb_itor_last;
|
||||
itor->_search = (isearch_func)hb_itor_search;
|
||||
itor->_key = (key_func)hb_itor_key;
|
||||
itor->_data = (data_func)hb_itor_data;
|
||||
itor->_cdata = (cdata_func)hb_itor_cdata;
|
||||
itor->_setdata = (dataset_func)hb_itor_set_data;
|
||||
|
||||
return itor;
|
||||
}
|
||||
|
||||
void
|
||||
hb_itor_destroy(hb_itor *itor)
|
||||
{
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
FREE(itor);
|
||||
}
|
||||
|
||||
#define RETVALID(itor) return itor->node != NULL
|
||||
|
||||
int
|
||||
hb_itor_valid(const hb_itor *itor)
|
||||
{
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
RETVALID(itor);
|
||||
}
|
||||
|
||||
void
|
||||
hb_itor_invalidate(hb_itor *itor)
|
||||
{
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
itor->node = NULL;
|
||||
}
|
||||
|
||||
int
|
||||
hb_itor_next(hb_itor *itor)
|
||||
{
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
if (itor->node == NULL)
|
||||
hb_itor_first(itor);
|
||||
else
|
||||
itor->node = node_next(itor->node);
|
||||
RETVALID(itor);
|
||||
}
|
||||
|
||||
int
|
||||
hb_itor_prev(hb_itor *itor)
|
||||
{
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
if (itor->node == NULL)
|
||||
hb_itor_last(itor);
|
||||
else
|
||||
itor->node = node_prev(itor->node);
|
||||
RETVALID(itor);
|
||||
}
|
||||
|
||||
int
|
||||
hb_itor_nextn(hb_itor *itor, unsigned count)
|
||||
{
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
if (count) {
|
||||
if (itor->node == NULL) {
|
||||
hb_itor_first(itor);
|
||||
count--;
|
||||
}
|
||||
|
||||
while (count-- && itor->node)
|
||||
itor->node = node_next(itor->node);
|
||||
}
|
||||
|
||||
RETVALID(itor);
|
||||
}
|
||||
|
||||
int
|
||||
hb_itor_prevn(hb_itor *itor, unsigned count)
|
||||
{
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
if (count) {
|
||||
if (itor->node == NULL) {
|
||||
hb_itor_last(itor);
|
||||
count--;
|
||||
}
|
||||
|
||||
while (count-- && itor->node)
|
||||
itor->node = node_prev(itor->node);
|
||||
}
|
||||
|
||||
RETVALID(itor);
|
||||
}
|
||||
|
||||
int
|
||||
hb_itor_first(hb_itor *itor)
|
||||
{
|
||||
hb_tree *t;
|
||||
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
t = itor->tree;
|
||||
itor->node = t->root ? node_min(t->root) : NULL;
|
||||
RETVALID(itor);
|
||||
}
|
||||
|
||||
int
|
||||
hb_itor_last(hb_itor *itor)
|
||||
{
|
||||
hb_tree *t;
|
||||
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
t = itor->tree;
|
||||
itor->node = t->root ? node_max(t->root) : NULL;
|
||||
RETVALID(itor);
|
||||
}
|
||||
|
||||
int
|
||||
hb_itor_search(hb_itor *itor, const void *key)
|
||||
{
|
||||
int rv;
|
||||
hb_node *node;
|
||||
dict_cmp_func cmp;
|
||||
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
cmp = itor->tree->key_cmp;
|
||||
for (node = itor->tree->root; node;) {
|
||||
rv = cmp(key, node->key);
|
||||
if (rv == 0)
|
||||
break;
|
||||
node = rv < 0 ? node->llink : node->rlink;
|
||||
}
|
||||
itor->node = node;
|
||||
RETVALID(itor);
|
||||
}
|
||||
|
||||
const void *
|
||||
hb_itor_key(const hb_itor *itor)
|
||||
{
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
return itor->node ? itor->node->key : NULL;
|
||||
}
|
||||
|
||||
void *
|
||||
hb_itor_data(hb_itor *itor)
|
||||
{
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
return itor->node ? itor->node->dat : NULL;
|
||||
}
|
||||
|
||||
const void *
|
||||
hb_itor_cdata(const hb_itor *itor)
|
||||
{
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
return itor->node ? itor->node->dat : NULL;
|
||||
}
|
||||
|
||||
int
|
||||
hb_itor_set_data(hb_itor *itor, void *dat, int del)
|
||||
{
|
||||
ASSERT(itor != NULL);
|
||||
|
||||
if (itor->node == NULL)
|
||||
return -1;
|
||||
|
||||
if (del && itor->tree->dat_del)
|
||||
itor->tree->dat_del(itor->node->dat);
|
||||
itor->node->dat = dat;
|
||||
return 0;
|
||||
}
|
64
ompi/mca/coll/libnbc/hb_tree.h
Обычный файл
64
ompi/mca/coll/libnbc/hb_tree.h
Обычный файл
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* hb_tree.h
|
||||
*
|
||||
* Interface for height balanced tree.
|
||||
* Copyright (C) 2001 Farooq Mela.
|
||||
*
|
||||
* $Id: hb_tree.h,v 1.2 2001/09/10 06:46:41 farooq Exp $
|
||||
*/
|
||||
|
||||
#ifndef _HB_TREE_H_
|
||||
#define _HB_TREE_H_
|
||||
|
||||
#include "dict.h"
|
||||
|
||||
BEGIN_DECL
|
||||
|
||||
struct hb_tree;
|
||||
typedef struct hb_tree hb_tree;
|
||||
|
||||
hb_tree *hb_tree_new __P((dict_cmp_func key_cmp, dict_del_func key_del,
|
||||
dict_del_func dat_del));
|
||||
dict *hb_dict_new __P((dict_cmp_func key_cmp, dict_del_func key_del,
|
||||
dict_del_func dat_del));
|
||||
void hb_tree_destroy __P((hb_tree *tree, int del));
|
||||
|
||||
int hb_tree_insert __P((hb_tree *tree, void *key, void *dat, int overwrite));
|
||||
int hb_tree_probe __P((hb_tree *tree, void *key, void **dat));
|
||||
void *hb_tree_search __P((hb_tree *tree, const void *key));
|
||||
const void *hb_tree_csearch __P((const hb_tree *tree, const void *key));
|
||||
int hb_tree_remove __P((hb_tree *tree, const void *key, int del));
|
||||
void hb_tree_empty __P((hb_tree *tree, int del));
|
||||
void hb_tree_walk __P((hb_tree *tree, dict_vis_func visit));
|
||||
unsigned hb_tree_count __P((const hb_tree *tree));
|
||||
unsigned hb_tree_height __P((const hb_tree *tree));
|
||||
unsigned hb_tree_mheight __P((const hb_tree *tree));
|
||||
unsigned hb_tree_pathlen __P((const hb_tree *tree));
|
||||
const void *hb_tree_min __P((const hb_tree *tree));
|
||||
const void *hb_tree_max __P((const hb_tree *tree));
|
||||
|
||||
struct hb_itor;
|
||||
typedef struct hb_itor hb_itor;
|
||||
|
||||
hb_itor *hb_itor_new __P((hb_tree *tree));
|
||||
dict_itor *hb_dict_itor_new __P((hb_tree *tree));
|
||||
void hb_itor_destroy __P((hb_itor *tree));
|
||||
|
||||
int hb_itor_valid __P((const hb_itor *itor));
|
||||
void hb_itor_invalidate __P((hb_itor *itor));
|
||||
int hb_itor_next __P((hb_itor *itor));
|
||||
int hb_itor_prev __P((hb_itor *itor));
|
||||
int hb_itor_nextn __P((hb_itor *itor, unsigned count));
|
||||
int hb_itor_prevn __P((hb_itor *itor, unsigned count));
|
||||
int hb_itor_first __P((hb_itor *itor));
|
||||
int hb_itor_last __P((hb_itor *itor));
|
||||
int hb_itor_search __P((hb_itor *itor, const void *key));
|
||||
const void *hb_itor_key __P((const hb_itor *itor));
|
||||
void *hb_itor_data __P((hb_itor *itor));
|
||||
const void *hb_itor_cdata __P((const hb_itor *itor));
|
||||
int hb_itor_set_data __P((hb_itor *itor, void *dat, int del));
|
||||
int hb_itor_remove __P((hb_itor *itor, int del));
|
||||
|
||||
END_DECL
|
||||
|
||||
#endif /* !_HB_TREE_H_ */
|
1142
ompi/mca/coll/libnbc/ib.c
Обычный файл
1142
ompi/mca/coll/libnbc/ib.c
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
119
ompi/mca/coll/libnbc/ib.h
Обычный файл
119
ompi/mca/coll/libnbc/ib.h
Обычный файл
@ -0,0 +1,119 @@
|
||||
#ifndef __IB_H__
|
||||
#define __IB_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <evapi.h>
|
||||
#include <vapi.h>
|
||||
#include <vapi_common.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#include "dict.h"
|
||||
|
||||
#define IB_OK 0
|
||||
#define IB_OOR 1
|
||||
#define IB_CONTINUE 2
|
||||
|
||||
#define IB_RTR_SIZE 320 /* number of RTR buffers per peer */
|
||||
#define IB_EAGER_SIZE 160 /* number of eager buffers per peer */
|
||||
#define IB_EAGER_LIMIT 8247 /* equals 8192 + 64 - 9 (tag, index, size, flag) to ensure 64 byte alignment */
|
||||
|
||||
typedef struct {
|
||||
u_int64_t r_key; /* r_key of peer - TODO: should be u_int32_t ...*/
|
||||
u_int64_t addr; /* addr of peer - TODO: could be u_int32_t ... */
|
||||
} IB_Peer_info;
|
||||
|
||||
typedef struct {
|
||||
u_int64_t r_key; /* r_key of peer - TODO: should be u_int32_t ... */
|
||||
u_int64_t addr; /* addr of peer - TODO: could be u_int32_t ... */
|
||||
int32_t tag; /* tag has to be at the end (we do not send with immediate, so tag indicates receive -> could be dangerous) */
|
||||
} IB_Peer_info_tag;
|
||||
|
||||
typedef struct {
|
||||
VAPI_mr_hndl_t mr_hndl_p; /* memroy region handle - should really be a pointer to a memlist-element ... */
|
||||
VAPI_sg_lst_entry_t sr_sg_lst; /* the IB SG list */
|
||||
VAPI_sr_desc_t sr_desc; /* the IB SR descr. */
|
||||
int p; /* how many procs (RCQ, SCQ, QP) are in this comm */
|
||||
struct struct_IB_Comminfo *comminfo; /* the communicator info struct */
|
||||
int tag; /* our tag */
|
||||
int rank; /* our rank */
|
||||
int peer; /* the peer (dst for send, src for recv */
|
||||
void *buf; /* we need the buf for eager messages on the receiver side */
|
||||
int sendel; /* the element in comminfo.send[element] which is use by this request to send RTR or eager messages - we want to free it after sending RTR/eager */
|
||||
enum {FREE, SEND_WAITING_RTR, SEND_POSTED_SR, SEND_SENT_EAGER, RECV_SENDING_RTR, RECV_SENT_RTR, RECV_WAITING_EAGER, SEND_DONE, RECV_DONE, RECV_EAGER_DONE} status; /* this indicates the operation (send,recv) and the status of this op */
|
||||
} IB_Req;
|
||||
|
||||
typedef IB_Req* IB_Request;
|
||||
|
||||
struct struct_IB_Taglstel {
|
||||
int tag; /* the tag -> key element */
|
||||
IB_Req *req; /* the request having this tag */
|
||||
};
|
||||
typedef struct struct_IB_Taglstel IB_Taglstel;
|
||||
|
||||
struct struct_IB_Memlistel {
|
||||
void *buf;
|
||||
int size;
|
||||
VAPI_mr_hndl_t *mr;
|
||||
VAPI_rkey_t r_key;
|
||||
VAPI_lkey_t l_key;
|
||||
};
|
||||
typedef struct struct_IB_Memlistel IB_Memlistel;
|
||||
|
||||
typedef struct {
|
||||
int16_t index; /* the index in my free-array on the sender side - I should RDMA a -1 into this array to indicate the receiption */
|
||||
int16_t size; /* the actual size of the message, after this size follows a flag (single byte) to poll on receiption */
|
||||
int32_t tag; /* the message tag */
|
||||
int8_t buf[IB_EAGER_LIMIT]; /* the data buffer - should be chosen that the whole structure size is 64 byte aligned */
|
||||
int8_t flag; /* the flag to poll for completion - only if buffer is full */
|
||||
} IB_Eager_data;
|
||||
|
||||
struct struct_IB_Comminfo {
|
||||
VAPI_qp_hndl_t *qp_hndl_arr; /* QPs to all ranks in this comm */
|
||||
VAPI_cq_hndl_t *sr_cq_hndl_arr; /* SR CQs for all ranks in this comm */
|
||||
VAPI_cq_hndl_t *rr_cq_hndl_arr; /* RR CQs for all ranks in this comm */
|
||||
|
||||
/* the old crappy linear taglist */
|
||||
IB_Taglstel *taglisthead; /* the comm specific taglist */
|
||||
IB_Taglstel *taglistend; /* the end pointer of the taglist */
|
||||
|
||||
hb_tree **taglist; /* the new fancy AVL tree taglists - one for each peer (2-d matching is not possible :-(, so the peer-dimension is like a hash-table O(1) :) ) */
|
||||
|
||||
IB_Peer_info_tag *rtr_send; /* send queue for me (only for RTR) - IB_RTR_SIZE elements */
|
||||
VAPI_mr_hndl_t rtr_send_mr_hndl_p; /* memory region handle - needed to free MR ... */
|
||||
VAPI_lkey_t rtr_send_l_key; /* l_key for send */
|
||||
|
||||
VAPI_mr_hndl_t *rtr_mr_hndl_p; /* memory region handles per proc - needed to free MR ... */
|
||||
IB_Peer_info *rtr_info; /* rtr r_key, addr for each host */
|
||||
VAPI_lkey_t *rtr_l_key; /* l_key for rtr region */
|
||||
volatile IB_Peer_info_tag **rtr; /* rtr queue for each host - IB_RTR_SIZE elements */
|
||||
int **rtr_peer_free; /* indicates which elements are free at the specific peer (-1 means free, > -1 means filled with tag x */
|
||||
|
||||
IB_Eager_data *eager_send; /* eager send queue for me (only for eager) - IB_EAGER_SIZE elements */
|
||||
VAPI_mr_hndl_t eager_send_mr_hndl_p; /* memory region handle - needed to free MR ... */
|
||||
VAPI_lkey_t eager_send_l_key; /* l_key for eager send */
|
||||
|
||||
VAPI_mr_hndl_t *eager_mr_hndl_p; /* memory region handles per peer - needed to free MR ... */
|
||||
IB_Peer_info *eager_info; /* rtr r_key, addr for each host */
|
||||
VAPI_lkey_t *eager_l_key; /* l_key for rtr region per peer */
|
||||
volatile IB_Eager_data **eager; /* rtr queue for each host - IB_RTR_SIZE elements */
|
||||
int **eager_peer_free; /* indicates which elements are free at the specific peer (-1 means free, > -1 means filled with tag x */
|
||||
|
||||
int empty; /* this is just a buffer to RDMA into eager_peer_free on the sender */
|
||||
VAPI_mr_hndl_t *eager_free_mr_hndl_p; /* memory region handles per peer - needed to free MR ... */
|
||||
IB_Peer_info *eager_free_info; /* rtr r_key, addr for each host to RDMA EAGER_DONE */
|
||||
VAPI_mr_hndl_t empty_mr_hndl_p; /* memory region handle - needed to free MR ... */
|
||||
VAPI_lkey_t empty_l_key; /* l_key for empty int */
|
||||
};
|
||||
typedef struct struct_IB_Comminfo IB_Comminfo;
|
||||
|
||||
/* function prototypes ... */
|
||||
int IB_Testall(int count, IB_Request *requests, int *flag);
|
||||
int IB_Waitall(int count, IB_Request *requests);
|
||||
int IB_Wait(IB_Request *req);
|
||||
int IB_Test(IB_Request *req);
|
||||
int IB_Isend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm mycomm, IB_Request *request);
|
||||
int IB_Irecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm mycomm, IB_Request *request);
|
||||
|
||||
#endif
|
104
ompi/mca/coll/libnbc/ib_main.c
Обычный файл
104
ompi/mca/coll/libnbc/ib_main.c
Обычный файл
@ -0,0 +1,104 @@
|
||||
#define IB
|
||||
#include <mpi.h>
|
||||
#include "ib.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int rank, res, size, i, loops, j, tag;
|
||||
IB_Request req;
|
||||
double t1=0, t2=0, t3=0, t4=0, t5, t6, t7;
|
||||
MPI_Request mpireq;
|
||||
void *buf2;
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
|
||||
size = 1;
|
||||
loops= 50;
|
||||
|
||||
buf2 = malloc(size);
|
||||
if(buf2 == NULL) printf("malloc() error\n");
|
||||
|
||||
if(rank == 0) {
|
||||
res = IB_Isend(buf2, size, MPI_BYTE, 1, 1, MPI_COMM_WORLD, &req);
|
||||
if(res) printf("Error in IB_Send (%i) \n", res);
|
||||
res = IB_Wait(&req);
|
||||
res = IB_Irecv(buf2, size, MPI_BYTE, 1, 1, MPI_COMM_WORLD, &req);
|
||||
if(res) printf("Error in IB_Recv (%i) \n", res);
|
||||
res = IB_Wait(&req);
|
||||
} else {
|
||||
res = IB_Irecv(buf2, size, MPI_BYTE, 0, 1, MPI_COMM_WORLD, &req);
|
||||
if(res) printf("Error in IB_Recv (%i)\n", res);
|
||||
res = IB_Wait(&req);
|
||||
res = IB_Isend(buf2, size, MPI_BYTE, 0, 1, MPI_COMM_WORLD, &req);
|
||||
if(res) printf("Error in IB_Send (%i) \n", res);
|
||||
res = IB_Wait(&req);
|
||||
}
|
||||
|
||||
printf("[%i] MEASUREMENT\n", rank);
|
||||
t1=t2=t3=t4=t5=t6=0;
|
||||
|
||||
|
||||
|
||||
for(j=1; j<loops+1;j++) {
|
||||
i = 0;
|
||||
tag = j;
|
||||
|
||||
t1 -= MPI_Wtime();
|
||||
if(!rank) {
|
||||
t2 -= MPI_Wtime();
|
||||
res = IB_Isend(buf2+i, size-i, MPI_BYTE, 1, tag, MPI_COMM_WORLD, &req);
|
||||
t2 += MPI_Wtime();
|
||||
if(res) printf("Error in IB_Send (%i) \n", res);
|
||||
res = IB_Wait(&req);
|
||||
|
||||
t3 -= MPI_Wtime();
|
||||
res = IB_Irecv(buf2+i, size-i, MPI_BYTE, 1, tag, MPI_COMM_WORLD, &req);
|
||||
t3 += MPI_Wtime();
|
||||
if(res) printf("Error in IB_Recv (%i) \n", res);
|
||||
res = IB_Wait(&req);
|
||||
} else {
|
||||
t3 -= MPI_Wtime();
|
||||
res = IB_Irecv(buf2+i, size-i, MPI_BYTE, 0, tag, MPI_COMM_WORLD, &req);
|
||||
if(res) printf("Error in IB_Recv (%i)\n", res);
|
||||
res = IB_Wait(&req);
|
||||
t3 += MPI_Wtime();
|
||||
|
||||
t2 -= MPI_Wtime();
|
||||
res = IB_Isend(buf2+i, size-i, MPI_BYTE, 0, tag, MPI_COMM_WORLD, &req);
|
||||
t2 += MPI_Wtime();
|
||||
if(res) printf("Error in IB_Send (%i) \n", res);
|
||||
res = IB_Wait(&req);
|
||||
}
|
||||
t1 += MPI_Wtime();
|
||||
|
||||
t4 -= MPI_Wtime();
|
||||
if(!rank) {
|
||||
t5 -= MPI_Wtime();
|
||||
MPI_Isend(buf2, size, MPI_BYTE, 1, 1, MPI_COMM_WORLD, &mpireq);
|
||||
t5 += MPI_Wtime();
|
||||
res = MPI_Wait(&mpireq, MPI_STATUS_IGNORE);
|
||||
|
||||
t6 -= MPI_Wtime();
|
||||
MPI_Irecv(buf2, size, MPI_BYTE, 1, 1, MPI_COMM_WORLD, &mpireq);
|
||||
t6 += MPI_Wtime();
|
||||
res = MPI_Wait(&mpireq, MPI_STATUS_IGNORE);
|
||||
} else {
|
||||
t6 -= MPI_Wtime();
|
||||
MPI_Irecv(buf2, size, MPI_BYTE, 0, 1, MPI_COMM_WORLD, &mpireq);
|
||||
t6 += MPI_Wtime();
|
||||
res = MPI_Wait(&mpireq, MPI_STATUS_IGNORE);
|
||||
|
||||
t5 -= MPI_Wtime();
|
||||
MPI_Isend(buf2, size, MPI_BYTE, 0, 1, MPI_COMM_WORLD, &mpireq);
|
||||
t5 += MPI_Wtime();
|
||||
res = MPI_Wait(&mpireq, MPI_STATUS_IGNORE);
|
||||
}
|
||||
t4 += MPI_Wtime();
|
||||
printf("[%i] %lf (%lf %lf) | %lf (%lf %lf) \n", rank, t1*1e6/j, t2*1e6/j, t3*1e6/j, t4*1e6/j, t5*1e6/j,t6*1e6/j);
|
||||
}
|
||||
free(buf2);
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
return 0;
|
||||
}
|
294
ompi/mca/coll/libnbc/main.c
Обычный файл
294
ompi/mca/coll/libnbc/main.c
Обычный файл
@ -0,0 +1,294 @@
|
||||
#include "nbc.h"
|
||||
#include <unistd.h>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
NBC_Handle handle1, handle2;
|
||||
int rank, i,j, res, p;
|
||||
int *buf1, *buf2, *ptr;
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
|
||||
/* shut up compiler */
|
||||
handle1 = handle2;
|
||||
handle2 = handle1;
|
||||
|
||||
res = MPI_Comm_size(MPI_COMM_WORLD, &p);
|
||||
res = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
if(res != MPI_SUCCESS) printf("Error in MPI_Comm_rank!\n");
|
||||
|
||||
buf1=malloc(2*p*sizeof(int));
|
||||
buf2=malloc(2*p*sizeof(int));
|
||||
|
||||
for (i=0; i<1; i++) {
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf1+j;
|
||||
*ptr = rank;
|
||||
ptr = buf2+j;
|
||||
*ptr = 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/**************************** BCAST ********************************/
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf1+j;
|
||||
*ptr = rank;
|
||||
ptr = buf2+j;
|
||||
*ptr = 0;
|
||||
}
|
||||
|
||||
printf("#[%i] Bcast before: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf1+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
|
||||
NBC_Ibcast(buf1, p, MPI_INT, 0, MPI_COMM_WORLD, &handle1);
|
||||
NBC_Wait(&handle1);
|
||||
|
||||
printf("#[%i] Bcast after: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf1+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
#if 0
|
||||
/**************************** GATHER ********************************/
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf1+j;
|
||||
*ptr = rank;
|
||||
ptr = buf2+j;
|
||||
*ptr = 0;
|
||||
}
|
||||
|
||||
if(rank ==0) {
|
||||
printf("#[%i] Gather before: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
NBC_Igather(buf1, 1, MPI_INT, buf2, 1, MPI_INT, 0, MPI_COMM_WORLD, &handle1);
|
||||
NBC_Wait(&handle1);
|
||||
|
||||
if(rank ==0) {
|
||||
printf("#[%i] Gather after: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/**************************** SCATTER ********************************/
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf1+j;
|
||||
*ptr = j;
|
||||
ptr = buf2+j;
|
||||
*ptr = 0;
|
||||
}
|
||||
|
||||
printf("#[%i] Scatter before: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
NBC_Iscatter(buf1, 1, MPI_INT, buf2, 1, MPI_INT, 0, MPI_COMM_WORLD, &handle1);
|
||||
NBC_Wait(&handle1);
|
||||
|
||||
printf("#[%i] Scatter after: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
/**************************** ALLGATHER ********************************/
|
||||
printf("#[%i] sbuf: %lu\n", rank, (unsigned long)buf1);
|
||||
printf("#[%i] rbuf: %lu\n", rank, (unsigned long)buf2);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf1+j;
|
||||
*ptr = rank;
|
||||
ptr = buf2+j;
|
||||
*ptr = 0;
|
||||
}
|
||||
*buf1 = rank;
|
||||
|
||||
printf("#[%i] Allgather before: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
NBC_Iallgather(buf1, 1, MPI_INT, buf2, 1, MPI_INT, MPI_COMM_WORLD, &handle1);
|
||||
NBC_Wait(&handle1);
|
||||
|
||||
printf("#[%i] Allgather after: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
/**************************** ALLTOALL ********************************/
|
||||
#if 0
|
||||
printf("#[%i] sbuf: %lu\n", rank, (unsigned long)buf1);
|
||||
printf("#[%i] rbuf: %lu\n", rank, (unsigned long)buf2);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf1+j;
|
||||
*ptr = j;
|
||||
ptr = buf2+j;
|
||||
*ptr = 0;
|
||||
}
|
||||
|
||||
printf("#[%i] Alltoall before: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
NBC_Ialltoall(buf1, 1, MPI_INT, buf2, 1, MPI_INT, MPI_COMM_WORLD, &handle1);
|
||||
NBC_Wait(&handle1);
|
||||
|
||||
printf("#[%i] Alltoall after: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
NBC_Ialltoall(buf1, 1, MPI_INT, buf2, 1, MPI_INT, MPI_COMM_WORLD, &handle1);
|
||||
NBC_Wait(&handle1);
|
||||
#endif
|
||||
#if 0
|
||||
/**************************** REDUCE ********************************/
|
||||
printf("#[%i] sbuf: %lu\n", rank, (unsigned long)buf1);
|
||||
printf("#[%i] rbuf: %lu\n", rank, (unsigned long)buf2);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf1+j;
|
||||
*ptr = 1;
|
||||
ptr = buf2+j;
|
||||
*ptr = 0;
|
||||
}
|
||||
|
||||
printf("#[%i] Reduce before: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
res = NBC_Ireduce(buf1, buf2, p, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD, &handle1);
|
||||
if(res != NBC_OK) { printf("error in NBC_Ireduce(): %i\n", res); }
|
||||
NBC_Wait(&handle1);
|
||||
|
||||
printf("#[%i] Reduce after: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
/**************************** ALLREDUCE ********************************/
|
||||
printf("#[%i] sbuf: %lu\n", rank, (unsigned long)buf1);
|
||||
printf("#[%i] rbuf: %lu\n", rank, (unsigned long)buf2);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf1+j;
|
||||
*ptr = 1;
|
||||
ptr = buf2+j;
|
||||
*ptr = 0;
|
||||
}
|
||||
|
||||
printf("#[%i] Reduce before: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
res = NBC_Iallreduce(buf1, buf2, 2*p, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &handle1);
|
||||
if(res != NBC_OK) { printf("error in NBC_Iallreduce(): %i\n", res); }
|
||||
NBC_Wait(&handle1);
|
||||
|
||||
printf("#[%i] Reduce after: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
#if 0
|
||||
/**************************** SCAN ********************************/
|
||||
printf("#[%i] sbuf: %lu\n", rank, (unsigned long)buf1);
|
||||
printf("#[%i] rbuf: %lu\n", rank, (unsigned long)buf2);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf1+j;
|
||||
*ptr = 1;
|
||||
ptr = buf2+j;
|
||||
*ptr = 0;
|
||||
}
|
||||
|
||||
printf("#[%i] Scan before: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
res = NBC_Iscan(buf1, buf2, p, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &handle1);
|
||||
if(res != NBC_OK) { printf("error in NBC_Iscan(): %i\n", res); }
|
||||
NBC_Wait(&handle1);
|
||||
|
||||
printf("#[%i] Scan after: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
/**************************** REDUCE_SCATTER ********************************/
|
||||
printf("#[%i] sbuf: %lu\n", rank, (unsigned long)buf1);
|
||||
printf("#[%i] rbuf: %lu\n", rank, (unsigned long)buf2);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf1+j;
|
||||
*ptr = j;
|
||||
ptr = buf2+j;
|
||||
*ptr = 0;
|
||||
}
|
||||
|
||||
printf("#[%i] reduce_scatter before: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
recvcounts=malloc(p*sizeof(int));
|
||||
for(j=0;j<p;j++) recvcounts[j] = 1;
|
||||
res = NBC_Ireduce_scatter(buf1, buf2, recvcounts, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &handle1);
|
||||
if(res != NBC_OK) { printf("error in NBC_Ireduce_scatter(): %i\n", res); }
|
||||
NBC_Wait(&handle1);
|
||||
free(recvcounts);
|
||||
|
||||
printf("#[%i] reduce_scatter after: ", rank);
|
||||
for(j=0;j<p;j++) {
|
||||
ptr = buf2+j;
|
||||
printf("%i ", *ptr);
|
||||
}
|
||||
printf("\n");
|
||||
/**************************** END ********************************/
|
||||
#endif
|
||||
}
|
||||
|
||||
free(buf1);
|
||||
free(buf2);
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
752
ompi/mca/coll/libnbc/nbc.c
Обычный файл
752
ompi/mca/coll/libnbc/nbc.c
Обычный файл
@ -0,0 +1,752 @@
|
||||
#include "nbc.h"
|
||||
|
||||
/* only used in this file */
|
||||
static __inline__ int NBC_Start_round(NBC_Handle *handle);
|
||||
|
||||
#ifndef OMPI_COMPONENT
|
||||
/* the keyval (global) */
|
||||
static int gkeyval=MPI_KEYVAL_INVALID;
|
||||
#endif
|
||||
|
||||
#ifndef OMPI_COMPONENT
|
||||
static int NBC_Key_copy(MPI_Comm oldcomm, int keyval, void *extra_state, void *attribute_val_in, void *attribute_val_out, int *flag) {
|
||||
/* delete the attribute in the new comm - it will be created at the
|
||||
* first usage */
|
||||
*flag = 0;
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int NBC_Key_delete(MPI_Comm comm, int keyval, void *attribute_val, void *extra_state) {
|
||||
NBC_Comminfo *comminfo;
|
||||
|
||||
if(keyval == gkeyval) {
|
||||
comminfo=(NBC_Comminfo*)attribute_val;
|
||||
free(comminfo);
|
||||
} else {
|
||||
printf("Got wrong keyval!(%i)\n", keyval);
|
||||
}
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* allocates a new schedule array */
|
||||
int NBC_Sched_create(NBC_Schedule* schedule) {
|
||||
|
||||
*schedule=malloc(2*sizeof(int));
|
||||
if(*schedule == NULL) { return NBC_OOR; }
|
||||
*(int*)*schedule=2*sizeof(int);
|
||||
*(((int*)*schedule)+1)=0;
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* frees a schedule array */
|
||||
int NBC_Free(NBC_Handle* handle) {
|
||||
|
||||
/*free(*handle->schedule);*/
|
||||
/* the schedule pointer itself is also malloc'd */
|
||||
/*free(handle->schedule);*/
|
||||
/* if the nbc_I<collective> attached some data */
|
||||
if(NULL != handle->tmpbuf) free(handle->tmpbuf);
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* this function puts a send into the schedule */
|
||||
int NBC_Sched_send(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule) {
|
||||
int size;
|
||||
NBC_Args_send* send_args;
|
||||
|
||||
/* get size of actual schedule */
|
||||
NBC_GET_SIZE(*schedule, size);
|
||||
/*printf("schedule is %i bytes\n", size);*/
|
||||
*schedule = realloc(*schedule, size+sizeof(NBC_Args_send)+sizeof(NBC_Fn_type));
|
||||
if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; }
|
||||
|
||||
/* adjust the function type */
|
||||
*(NBC_Fn_type*)((char*)*schedule+size)=SEND;
|
||||
|
||||
/* store the passed arguments */
|
||||
send_args = (NBC_Args_send*)((char*)*schedule+size+sizeof(NBC_Fn_type));
|
||||
send_args->buf=buf;
|
||||
send_args->tmpbuf=tmpbuf;
|
||||
send_args->count=count;
|
||||
send_args->datatype=datatype;
|
||||
send_args->dest=dest;
|
||||
|
||||
/* increase number of elements in schedule */
|
||||
NBC_INC_NUM_ROUND(*schedule);
|
||||
NBC_DEBUG(10, "adding send - ends at byte %i\n", (int)(size+sizeof(NBC_Args_send)+sizeof(NBC_Fn_type)));
|
||||
|
||||
/* increase size of schedule */
|
||||
NBC_INC_SIZE(*schedule, sizeof(NBC_Args_send)+sizeof(NBC_Fn_type));
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* this function puts a receive into the schedule */
|
||||
int NBC_Sched_recv(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule) {
|
||||
int size;
|
||||
NBC_Args_recv* recv_args;
|
||||
|
||||
/* get size of actual schedule */
|
||||
NBC_GET_SIZE(*schedule, size);
|
||||
/*printf("schedule is %i bytes\n", size);*/
|
||||
*schedule = realloc(*schedule, size+sizeof(NBC_Args_recv)+sizeof(NBC_Fn_type));
|
||||
if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; }
|
||||
|
||||
/* adjust the function type */
|
||||
*(NBC_Fn_type*)((char*)*schedule+size)=RECV;
|
||||
|
||||
/* store the passed arguments */
|
||||
recv_args=(NBC_Args_recv*)((char*)*schedule+size+sizeof(NBC_Fn_type));
|
||||
recv_args->buf=buf;
|
||||
recv_args->tmpbuf=tmpbuf;
|
||||
recv_args->count=count;
|
||||
recv_args->datatype=datatype;
|
||||
recv_args->source=source;
|
||||
|
||||
/* increase number of elements in schedule */
|
||||
NBC_INC_NUM_ROUND(*schedule);
|
||||
NBC_DEBUG(10, "adding receive - ends at byte %i\n", (int)(size+sizeof(NBC_Args_recv)+sizeof(NBC_Fn_type)));
|
||||
|
||||
/* increase size of schedule */
|
||||
NBC_INC_SIZE(*schedule, sizeof(NBC_Args_recv)+sizeof(NBC_Fn_type));
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* this function puts an operation into the schedule */
|
||||
int NBC_Sched_op(void *buf3, char tmpbuf3, void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule) {
|
||||
int size;
|
||||
NBC_Args_op* op_args;
|
||||
|
||||
/* get size of actual schedule */
|
||||
NBC_GET_SIZE(*schedule, size);
|
||||
/*printf("schedule is %i bytes\n", size);*/
|
||||
*schedule = realloc(*schedule, size+sizeof(NBC_Args_op)+sizeof(NBC_Fn_type));
|
||||
if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; }
|
||||
|
||||
/* adjust the function type */
|
||||
*(NBC_Fn_type*)((char*)*schedule+size)=OP;
|
||||
|
||||
/* store the passed arguments */
|
||||
op_args=(NBC_Args_op*)((char*)*schedule+size+sizeof(NBC_Fn_type));
|
||||
op_args->buf1=buf1;
|
||||
op_args->buf2=buf2;
|
||||
op_args->buf3=buf3;
|
||||
op_args->tmpbuf1=tmpbuf1;
|
||||
op_args->tmpbuf2=tmpbuf2;
|
||||
op_args->tmpbuf3=tmpbuf3;
|
||||
op_args->count=count;
|
||||
op_args->op=op;
|
||||
op_args->datatype=datatype;
|
||||
|
||||
/* increase number of elements in schedule */
|
||||
NBC_INC_NUM_ROUND(*schedule);
|
||||
NBC_DEBUG(10, "adding op - ends at byte %i\n", (int)(size+sizeof(NBC_Args_op)+sizeof(NBC_Fn_type)));
|
||||
|
||||
/* increase size of schedule */
|
||||
NBC_INC_SIZE(*schedule, sizeof(NBC_Args_op)+sizeof(NBC_Fn_type));
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* this function puts a copy into the schedule */
|
||||
int NBC_Sched_copy(void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, MPI_Datatype tgttype, NBC_Schedule *schedule) {
|
||||
int size;
|
||||
NBC_Args_copy* copy_args;
|
||||
|
||||
/* get size of actual schedule */
|
||||
NBC_GET_SIZE(*schedule, size);
|
||||
/*printf("schedule is %i bytes\n", size);*/
|
||||
*schedule = realloc(*schedule, size+sizeof(NBC_Args_copy)+sizeof(NBC_Fn_type));
|
||||
if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; }
|
||||
|
||||
/* adjust the function type */
|
||||
*(NBC_Fn_type*)((char*)*schedule+size)=COPY;
|
||||
|
||||
/* store the passed arguments */
|
||||
copy_args = (NBC_Args_copy*)((char*)*schedule+size+sizeof(NBC_Fn_type));
|
||||
copy_args->src=src;
|
||||
copy_args->tmpsrc=tmpsrc;
|
||||
copy_args->srccount=srccount;
|
||||
copy_args->srctype=srctype;
|
||||
copy_args->tgt=tgt;
|
||||
copy_args->tmptgt=tmptgt;
|
||||
copy_args->tgtcount=tgtcount;
|
||||
copy_args->tgttype=tgttype;
|
||||
|
||||
/* increase number of elements in schedule */
|
||||
NBC_INC_NUM_ROUND(*schedule);
|
||||
NBC_DEBUG(10, "adding copy - ends at byte %i\n", (int)(size+sizeof(NBC_Args_copy)+sizeof(NBC_Fn_type)));
|
||||
|
||||
/* increase size of schedule */
|
||||
NBC_INC_SIZE(*schedule, sizeof(NBC_Args_copy)+sizeof(NBC_Fn_type));
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* this function puts a unpack into the schedule */
|
||||
int NBC_Sched_unpack(void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, NBC_Schedule *schedule) {
|
||||
int size;
|
||||
NBC_Args_unpack* unpack_args;
|
||||
|
||||
/* get size of actual schedule */
|
||||
NBC_GET_SIZE(*schedule, size);
|
||||
/*printf("schedule is %i bytes\n", size);*/
|
||||
*schedule = realloc(*schedule, size+sizeof(NBC_Args_unpack)+sizeof(NBC_Fn_type));
|
||||
if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; }
|
||||
|
||||
/* adjust the function type */
|
||||
*(NBC_Fn_type*)((char*)*schedule+size)=UNPACK;
|
||||
|
||||
/* store the passed arguments */
|
||||
unpack_args = (NBC_Args_unpack*)((char*)*schedule+size+sizeof(NBC_Fn_type));
|
||||
unpack_args->inbuf=inbuf;
|
||||
unpack_args->tmpinbuf=tmpinbuf;
|
||||
unpack_args->count=count;
|
||||
unpack_args->datatype=datatype;
|
||||
unpack_args->outbuf=outbuf;
|
||||
unpack_args->tmpoutbuf=tmpoutbuf;
|
||||
|
||||
/* increase number of elements in schedule */
|
||||
NBC_INC_NUM_ROUND(*schedule);
|
||||
NBC_DEBUG(10, "adding unpack - ends at byte %i\n", (int)(size+sizeof(NBC_Args_unpack)+sizeof(NBC_Fn_type)));
|
||||
|
||||
/* increase size of schedule */
|
||||
NBC_INC_SIZE(*schedule, sizeof(NBC_Args_unpack)+sizeof(NBC_Fn_type));
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* this function ends a round of a schedule */
|
||||
int NBC_Sched_barrier(NBC_Schedule *schedule) {
|
||||
int size;
|
||||
|
||||
/* get size of actual schedule */
|
||||
NBC_GET_SIZE(*schedule, size);
|
||||
/*printf("round terminated at %i bytes\n", size);*/
|
||||
*schedule = realloc(*schedule, size+sizeof(char)+sizeof(int));
|
||||
if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; }
|
||||
|
||||
/* add the barrier char (1) because another round follows */
|
||||
*(char*)((char*)*schedule+size)=1;
|
||||
|
||||
/* set round count elements = 0 for new round */
|
||||
*(int*)((char*)*schedule+size+sizeof(char))=0;
|
||||
NBC_DEBUG(10, "ending round at byte %i\n", (int)(size+sizeof(char)+sizeof(int)));
|
||||
|
||||
/* increase size of schedule */
|
||||
NBC_INC_SIZE(*schedule, sizeof(char)+sizeof(int));
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* this function ends a schedule */
|
||||
int NBC_Sched_commit(NBC_Schedule *schedule) {
|
||||
int size;
|
||||
|
||||
/* get size of actual schedule */
|
||||
NBC_GET_SIZE(*schedule, size);
|
||||
/*printf("schedule terminated at %i bytes\n", size);*/
|
||||
*schedule = realloc(*schedule, size+sizeof(char));
|
||||
if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; }
|
||||
|
||||
/* add the barrier char (0) because this is the last round */
|
||||
*(char*)((char*)*schedule+size)=0;
|
||||
NBC_DEBUG(10, "closing schedule %p at byte %i\n", *schedule, (int)(size+sizeof(char)));
|
||||
|
||||
/* increase size of schedule */
|
||||
NBC_INC_SIZE(*schedule, sizeof(char));
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
int NBC_Progress(NBC_Handle *handle) {
|
||||
int flag, res;
|
||||
long size;
|
||||
char *delim;
|
||||
|
||||
if((handle->req_count > 0) && (handle->req_array != NULL)) {
|
||||
NBC_DEBUG(50, "NBC_Progress: testing for %i requests\n", handle->req_count);
|
||||
#ifdef OMPI_COMPONENT
|
||||
/*res = ompi_request_test_all(handle->req_count, handle->req_array, &flag, MPI_STATUSES_IGNORE);*/
|
||||
res = MPI_Testall(handle->req_count, handle->req_array, &flag, MPI_STATUS_IGNORE);
|
||||
if(res != OMPI_SUCCESS) { printf("MPI Error in MPI_Testall() (%i)\n", res); return res; }
|
||||
#endif
|
||||
#ifdef MPI
|
||||
res = MPI_Testall(handle->req_count, handle->req_array, &flag, MPI_STATUS_IGNORE);
|
||||
if(res != MPI_SUCCESS) { printf("MPI Error in MPI_Testall() (%i)\n", res); return res; }
|
||||
#endif
|
||||
#ifdef IB
|
||||
res = IB_Testall(handle->req_count, handle->req_array, &flag);
|
||||
if(res != MPI_SUCCESS) { printf("MPI Error in MPI_Testall() (%i)\n", res); return res; }
|
||||
#endif
|
||||
} else {
|
||||
flag = 1; /* we had no open requests -> proceed to next round */
|
||||
}
|
||||
|
||||
/* a round is finished */
|
||||
if(flag) {
|
||||
/* adjust delim to start of current round */
|
||||
NBC_DEBUG(10, "NBC_Progress: going in schedule %p to row-offset: %li\n", *handle->schedule, handle->row_offset);
|
||||
delim = (char*)*handle->schedule + handle->row_offset;
|
||||
NBC_DEBUG(10, "delim: %p\n", delim);
|
||||
NBC_GET_ROUND_SIZE(delim, size);
|
||||
NBC_DEBUG(10, "size: %li\n", size);
|
||||
/* adjust delim to end of current round -> delimiter */
|
||||
delim = delim + size;
|
||||
|
||||
if(handle->req_array != NULL) {
|
||||
/* free request array */
|
||||
free(handle->req_array);
|
||||
handle->req_array = NULL;
|
||||
}
|
||||
handle->req_count = 0;
|
||||
|
||||
if(*delim == 0) {
|
||||
/* this was the last round - we're done */
|
||||
NBC_DEBUG(5, "NBC_Progress last round finished - we're done\n");
|
||||
return NBC_OK;
|
||||
} else {
|
||||
NBC_DEBUG(5, "NBC_Progress round finished - goto next round\n");
|
||||
/* move delim to start of next round */
|
||||
delim = delim+1;
|
||||
/* initializing handle for new virgin round */
|
||||
handle->row_offset = (long)delim - (long)*handle->schedule;
|
||||
/* kick it off */
|
||||
res = NBC_Start_round(handle);
|
||||
if(NBC_OK != res) { printf("Error in NBC_Start_round() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
|
||||
return NBC_CONTINUE;
|
||||
}
|
||||
|
||||
int NBC_Progress_block(NBC_Handle *handle) {
|
||||
int res;
|
||||
long size;
|
||||
char *delim;
|
||||
|
||||
do {
|
||||
if((handle->req_count > 0) && (handle->req_array != NULL)) {
|
||||
NBC_DEBUG(50, "NBC_Progress_block: waiting for %i requests\n", handle->req_count);
|
||||
#ifdef OMPI_COMPONENT
|
||||
/*res = ompi_request_wait_all(handle->req_count, handle->req_array, MPI_STATUSES_IGNORE);*/
|
||||
res = MPI_Waitall(handle->req_count, handle->req_array, MPI_STATUS_IGNORE);
|
||||
if(res != OMPI_SUCCESS) { printf("MPI Error in MPI_Waitall() (%i)\n", res); return res; }
|
||||
#endif
|
||||
#ifdef MPI
|
||||
res = MPI_Waitall(handle->req_count, handle->req_array, MPI_STATUS_IGNORE);
|
||||
if(res != MPI_SUCCESS) { printf("MPI Error in MPI_Waitall() (%i)\n", res); return res; }
|
||||
#endif
|
||||
#ifdef IB
|
||||
res = IB_Waitall(handle->req_count, handle->req_array);
|
||||
if(res != NBC_OK) { printf("MPI Error in MPI_Waitall() (%i)\n", res); return res; }
|
||||
#endif
|
||||
}
|
||||
|
||||
/* a round is finished - adjust delim to start of current round */
|
||||
NBC_DEBUG(10, "NBC_Progress_block: going in schedule %p to row-offset: %li\n", *handle->schedule, handle->row_offset);
|
||||
delim = (char*)*handle->schedule + handle->row_offset;
|
||||
NBC_DEBUG(10, "delim: %p\n", delim);
|
||||
NBC_GET_ROUND_SIZE(delim, size);
|
||||
NBC_DEBUG(10, "size: %li\n", size);
|
||||
/* adjust delim to end of current round -> delimiter */
|
||||
delim = delim + size;
|
||||
|
||||
if(handle->req_array != NULL) {
|
||||
/* free request array */
|
||||
free(handle->req_array);
|
||||
handle->req_array = NULL;
|
||||
}
|
||||
handle->req_count = 0;
|
||||
|
||||
if(*delim == 0) {
|
||||
/* this was the last round - we're done */
|
||||
NBC_DEBUG(5, "NBC_Progress_block: last round finished - we're done\n");
|
||||
break;
|
||||
} else {
|
||||
NBC_DEBUG(5, "NBC_Progress_block: round finished - goto next round\n");
|
||||
/* move delim to start of next round */
|
||||
delim = delim+1;
|
||||
/* initializing handle for new virgin round */
|
||||
handle->row_offset = (long)delim - (long)*handle->schedule;
|
||||
/* kick it off */
|
||||
res = NBC_Start_round(handle);
|
||||
if(NBC_OK != res) { printf("Error in NBC_Start_round() (%i)\n", res); return res; }
|
||||
}
|
||||
} while(1);
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
static __inline__ int NBC_Start_round(NBC_Handle *handle) {
|
||||
int *numptr; /* number of operations */
|
||||
int i, res;
|
||||
NBC_Fn_type *typeptr;
|
||||
NBC_Args_send *sendargs;
|
||||
NBC_Args_recv *recvargs;
|
||||
NBC_Args_op *opargs;
|
||||
NBC_Args_copy *copyargs;
|
||||
NBC_Args_unpack *unpackargs;
|
||||
NBC_Schedule myschedule;
|
||||
void *buf1, *buf2, *buf3;
|
||||
|
||||
/* get schedule address */
|
||||
myschedule = (NBC_Schedule*)((char*)*handle->schedule + handle->row_offset);
|
||||
|
||||
numptr = (int*)myschedule;
|
||||
NBC_DEBUG(10, "start_round round at address %p : posting %i operations\n", myschedule, *numptr);
|
||||
|
||||
/* typeptr is increased by sizeof(int) bytes to point to type */
|
||||
typeptr = (NBC_Fn_type*)(numptr+1);
|
||||
for (i=0; i<*numptr; i++) {
|
||||
/* go sizeof op-data forward */
|
||||
switch(*typeptr) {
|
||||
case SEND:
|
||||
NBC_DEBUG(5," SEND (offset %li) ", (long)typeptr-(long)myschedule);
|
||||
sendargs = (NBC_Args_send*)(typeptr+1);
|
||||
NBC_DEBUG(5,"*buf: %p, count: %i, type: %lu, dest: %i, tag: %i)\n", sendargs->buf, sendargs->count, (unsigned long)sendargs->datatype, sendargs->dest, handle->tag);
|
||||
typeptr = (NBC_Fn_type*)(((NBC_Args_send*)typeptr)+1);
|
||||
/* get an additional request - TODO: req_count NOT thread safe */
|
||||
handle->req_count++;
|
||||
/* get buffer */
|
||||
if(sendargs->tmpbuf)
|
||||
buf1=(char*)handle->tmpbuf+(long)sendargs->buf;
|
||||
else
|
||||
buf1=sendargs->buf;
|
||||
#ifdef OMPI_COMPONENT
|
||||
handle->req_array = realloc(handle->req_array, (handle->req_count)*sizeof(ompi_request_t*));
|
||||
CHECK_NULL(handle->req_array);
|
||||
/*res = MCA_PML_CALL(isend_init(buf1, sendargs->count, sendargs->datatype, sendargs->dest, handle->tag, MCA_PML_BASE_SEND_STANDARD, handle->mycomm, handle->req_array+handle->req_count-1));
|
||||
printf("MPI_Isend(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, sendargs->count, (unsigned long)sendargs->datatype, sendargs->dest, handle->tag, (unsigned long)handle->mycomm, res);*/
|
||||
res = MPI_Isend(buf1, sendargs->count, sendargs->datatype, sendargs->dest, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1);
|
||||
if(OMPI_SUCCESS != res) { printf("Error in MPI_Isend(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, sendargs->count, (unsigned long)sendargs->datatype, sendargs->dest, handle->tag, (unsigned long)handle->mycomm, res); return res; }
|
||||
#endif
|
||||
#ifdef MPI
|
||||
handle->req_array = realloc(handle->req_array, (handle->req_count)*sizeof(MPI_Request));
|
||||
CHECK_NULL(handle->req_array);
|
||||
res = MPI_Isend(buf1, sendargs->count, sendargs->datatype, sendargs->dest, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1);
|
||||
if(MPI_SUCCESS != res) { printf("Error in MPI_Isend(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, sendargs->count, (unsigned long)sendargs->datatype, sendargs->dest, handle->tag, (unsigned long)handle->mycomm, res); return res; }
|
||||
#endif
|
||||
#ifdef IB
|
||||
handle->req_array = realloc(handle->req_array, (handle->req_count)*sizeof(IB_Request));
|
||||
CHECK_NULL(handle->req_array);
|
||||
res = IB_Isend(buf1, sendargs->count, sendargs->datatype, sendargs->dest, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1);
|
||||
if(NBC_OK != res) { printf("Error in IB_Isend() (%i)\n", res); return res; }
|
||||
#endif
|
||||
break;
|
||||
case RECV:
|
||||
NBC_DEBUG(5, " RECV (offset %li) ", (long)typeptr-(long)myschedule);
|
||||
recvargs = (NBC_Args_recv*)(typeptr+1);
|
||||
NBC_DEBUG(5, "*buf: %p, count: %i, type: %lu, source: %i, tag: %i)\n", recvargs->buf, recvargs->count, (unsigned long)recvargs->datatype, recvargs->source, handle->tag);
|
||||
typeptr = (NBC_Fn_type*)(((NBC_Args_recv*)typeptr)+1);
|
||||
/* get an additional request - TODO: req_count NOT thread safe */
|
||||
handle->req_count++;
|
||||
/* get buffer */
|
||||
if(recvargs->tmpbuf) {
|
||||
buf1=(char*)handle->tmpbuf+(long)recvargs->buf;
|
||||
} else {
|
||||
buf1=recvargs->buf;
|
||||
}
|
||||
#ifdef OMPI_COMPONENT
|
||||
handle->req_array = realloc(handle->req_array, (handle->req_count)*sizeof(ompi_request_t*));
|
||||
CHECK_NULL(handle->req_array);
|
||||
/*res = MCA_PML_CALL(irecv(buf1, recvargs->count, recvargs->datatype, recvargs->source, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1));
|
||||
printf("MPI_Irecv(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, recvargs->count, (unsigned long)recvargs->datatype, recvargs->source, handle->tag, (unsigned long)handle->mycomm, res); */
|
||||
res = MPI_Irecv(buf1, recvargs->count, recvargs->datatype, recvargs->source, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1);
|
||||
if(OMPI_SUCCESS != res) { printf("Error in MPI_Irecv(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, recvargs->count, (unsigned long)recvargs->datatype, recvargs->source, handle->tag, (unsigned long)handle->mycomm, res); return res; }
|
||||
#endif
|
||||
#ifdef MPI
|
||||
handle->req_array = realloc(handle->req_array, (handle->req_count)*sizeof(MPI_Request));
|
||||
CHECK_NULL(handle->req_array);
|
||||
res = MPI_Irecv(buf1, recvargs->count, recvargs->datatype, recvargs->source, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1);
|
||||
if(MPI_SUCCESS != res) { printf("Error in MPI_Irecv(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, recvargs->count, (unsigned long)recvargs->datatype, recvargs->source, handle->tag, (unsigned long)handle->mycomm, res); return res; }
|
||||
#endif
|
||||
#ifdef IB
|
||||
handle->req_array = realloc(handle->req_array, (handle->req_count)*sizeof(IB_Request));
|
||||
CHECK_NULL(handle->req_array);
|
||||
res = IB_Irecv(buf1, recvargs->count, recvargs->datatype, recvargs->source, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1);
|
||||
if(NBC_OK != res) { printf("Error in MPI_Irecv() (%i)\n", res); return res; }
|
||||
#endif
|
||||
break;
|
||||
case OP:
|
||||
NBC_DEBUG(5, " OP (offset %li) ", (long)typeptr-(long)myschedule);
|
||||
opargs = (NBC_Args_op*)(typeptr+1);
|
||||
NBC_DEBUG(5, "*buf1: %lu, buf2: %lu, count: %i, type: %lu)\n", (unsigned long)opargs->buf1, (unsigned long)opargs->buf2, opargs->count, (unsigned long)opargs->datatype);
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_op*)typeptr+1);
|
||||
/* get buffers */
|
||||
if(opargs->tmpbuf1)
|
||||
buf1=(char*)handle->tmpbuf+(long)opargs->buf1;
|
||||
else
|
||||
buf1=opargs->buf1;
|
||||
if(opargs->tmpbuf2)
|
||||
buf2=(char*)handle->tmpbuf+(long)opargs->buf2;
|
||||
else
|
||||
buf2=opargs->buf2;
|
||||
if(opargs->tmpbuf3)
|
||||
buf3=(char*)handle->tmpbuf+(long)opargs->buf3;
|
||||
else
|
||||
buf3=opargs->buf3;
|
||||
res = NBC_Operation(buf3, buf1, buf2, opargs->op, opargs->datatype, opargs->count);
|
||||
if(res != NBC_OK) { printf("NBC_Operation() failed (code: %i)\n", res); return res; }
|
||||
break;
|
||||
case COPY:
|
||||
NBC_DEBUG(5, " COPY (offset %li) ", (long)typeptr-(long)myschedule);
|
||||
copyargs = (NBC_Args_copy*)(typeptr+1);
|
||||
NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu, tgtcount: %i, tgttype: %lu)\n", (unsigned long)copyargs->src, copyargs->srccount, (unsigned long)copyargs->srctype, (unsigned long)copyargs->tgt, copyargs->tgtcount, (unsigned long)copyargs->tgttype);
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_copy*)typeptr+1);
|
||||
/* get buffers */
|
||||
if(copyargs->tmpsrc)
|
||||
buf1=(char*)handle->tmpbuf+(long)copyargs->src;
|
||||
else
|
||||
buf1=copyargs->src;
|
||||
if(copyargs->tmptgt)
|
||||
buf2=(char*)handle->tmpbuf+(long)copyargs->tgt;
|
||||
else
|
||||
buf2=copyargs->tgt;
|
||||
res = NBC_Copy(buf1, copyargs->srccount, copyargs->srctype, buf2, copyargs->tgtcount, copyargs->tgttype, handle->mycomm);
|
||||
if(res != NBC_OK) { printf("NBC_Copy() failed (code: %i)\n", res); return res; }
|
||||
break;
|
||||
case UNPACK:
|
||||
NBC_DEBUG(5, " UNPACK (offset %li) ", (long)typeptr-(long)myschedule);
|
||||
unpackargs = (NBC_Args_unpack*)(typeptr+1);
|
||||
NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu\n", (unsigned long)unpackargs->inbuf, unpackargs->count, (unsigned long)unpackargs->datatype, (unsigned long)unpackargs->outbuf);
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_unpack*)typeptr+1);
|
||||
/* get buffers */
|
||||
if(unpackargs->tmpinbuf)
|
||||
buf1=(char*)handle->tmpbuf+(long)unpackargs->inbuf;
|
||||
else
|
||||
buf1=unpackargs->outbuf;
|
||||
if(unpackargs->tmpoutbuf)
|
||||
buf2=(char*)handle->tmpbuf+(long)unpackargs->outbuf;
|
||||
else
|
||||
buf2=unpackargs->outbuf;
|
||||
res = NBC_Unpack(buf1, unpackargs->count, unpackargs->datatype, buf2, handle->mycomm);
|
||||
if(res != NBC_OK) { printf("NBC_Unpack() failed (code: %i)\n", res); return res; }
|
||||
break;
|
||||
default:
|
||||
printf("NBC_Start_round: bad type %li at offset %li\n", (long)*typeptr, (long)typeptr-(long)myschedule);
|
||||
return NBC_BAD_SCHED;
|
||||
}
|
||||
/* increase ptr by size of fn_type enum */
|
||||
typeptr = (NBC_Fn_type*)((NBC_Fn_type*)typeptr+1);
|
||||
}
|
||||
|
||||
/* check if we can make progress - not in the first round, this allows us to leave the
|
||||
* initialization faster and to reach more overlap */
|
||||
if(handle->row_offset != 4) {
|
||||
res = NBC_Progress(handle);
|
||||
if((NBC_OK != res) && (NBC_CONTINUE != res)) { printf("Error in NBC_Progress() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
__inline__ int NBC_Init_handle(NBC_Handle *handle, MPI_Comm comm) {
|
||||
int res, flag;
|
||||
NBC_Comminfo *comminfo;
|
||||
|
||||
/* create a new state and return handle to it */
|
||||
handle->req_array = NULL;
|
||||
handle->req_count = 0;
|
||||
handle->comm = comm;
|
||||
/* first int is the schedule size */
|
||||
handle->row_offset = sizeof(int);
|
||||
|
||||
/******************** Do the tag and shadow comm administration ... ***************/
|
||||
|
||||
#ifdef OMPI_COMPONENT
|
||||
/* the communicator is an ompi_communicator_t with a pointer to
|
||||
* c_coll_selected_data where we have our comminfo struct - this was
|
||||
* initialized during comm_query */
|
||||
{
|
||||
ompi_communicator_t *ompicomm;
|
||||
|
||||
ompicomm = (ompi_communicator_t*)comm;
|
||||
comminfo = (NBC_Comminfo*)ompicomm->c_coll_selected_data;
|
||||
flag = 1;
|
||||
}
|
||||
#else
|
||||
/* otherwise we have to do the normal attribute stuff :-( */
|
||||
/* keyval is not initialized yet, we have to init it */
|
||||
if(MPI_KEYVAL_INVALID == gkeyval) {
|
||||
res = MPI_Keyval_create(NBC_Key_copy, NBC_Key_delete, &(gkeyval), NULL);
|
||||
if((MPI_SUCCESS != res)) { printf("Error in MPI_Keyval_create() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
res = MPI_Attr_get(comm, gkeyval, &comminfo, &flag);
|
||||
if((MPI_SUCCESS != res)) { printf("Error in MPI_Attr_get() (%i)\n", res); return res; }
|
||||
#endif
|
||||
if (flag) {
|
||||
/* we found it */
|
||||
comminfo->tag++;
|
||||
} else {
|
||||
/* we have to create a new one */
|
||||
comminfo = NBC_Init_comm(comm);
|
||||
if(comminfo == NULL) { printf("Error in NBC_Init_comm() %i\n", res); return NBC_OOR; }
|
||||
}
|
||||
handle->tag=comminfo->tag;
|
||||
handle->mycomm=comminfo->mycomm;
|
||||
/*printf("got comminfo: %lu tag: %i\n", comminfo, comminfo->tag);*/
|
||||
|
||||
#if 0
|
||||
/*#ifdef OMPI_COMPONENT*/
|
||||
/* we use negative tags for OMPI */
|
||||
if(handle->tag == -50) {
|
||||
handle->tag=-32767;
|
||||
comminfo->tag=-32767;
|
||||
NBC_DEBUG(2,"resetting tags ...\n");
|
||||
}
|
||||
#else
|
||||
#endif
|
||||
/* reset counter ... */
|
||||
if(handle->tag == 32767) {
|
||||
handle->tag=1;
|
||||
comminfo->tag=1;
|
||||
NBC_DEBUG(2,"resetting tags ...\n");
|
||||
}
|
||||
/*#endif*/
|
||||
|
||||
/******************** end of tag and shadow comm administration ... ***************/
|
||||
handle->comminfo = comminfo;
|
||||
|
||||
NBC_DEBUG(3, "got tag %i\n", handle->tag);
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
__inline__ NBC_Comminfo* NBC_Init_comm(MPI_Comm comm) {
|
||||
#ifndef OMPI_COMPONENT
|
||||
int res;
|
||||
#endif
|
||||
NBC_Comminfo *comminfo;
|
||||
|
||||
comminfo = malloc(sizeof(NBC_Comminfo));
|
||||
if(comminfo == NULL) { printf("Error in malloc()\n"); return NULL; }
|
||||
|
||||
|
||||
#ifdef OMPI_COMPONENT
|
||||
comminfo->mycomm = comm;
|
||||
/* set tag to 1 */
|
||||
/*comminfo->tag=-32767;*/
|
||||
comminfo->tag=1;
|
||||
#else
|
||||
/* set tag to 1 */
|
||||
comminfo->tag=1;
|
||||
/* dup and save shadow communicator */
|
||||
res = MPI_Comm_dup(comm, &(comminfo->mycomm));
|
||||
if((MPI_SUCCESS != res)) { printf("Error in MPI_Comm_dup() (%i)\n", res); return NULL; }
|
||||
NBC_DEBUG(1, "created a shadow communicator for %lu ... %lu\n", (unsigned long)comm, (unsigned long)comminfo->mycomm);
|
||||
#endif
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* initialize the NBC_ALLTOALL SchedCache tree */
|
||||
comminfo->NBC_Dict[NBC_ALLTOALL] = hb_tree_new((dict_cmp_func)NBC_Alltoall_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete);
|
||||
if(comminfo->NBC_Dict[NBC_ALLTOALL] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; }
|
||||
NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_ALLTOALL]);
|
||||
comminfo->NBC_Dict_size[NBC_ALLTOALL] = 0;
|
||||
/* initialize the NBC_ALLGATHER SchedCache tree */
|
||||
comminfo->NBC_Dict[NBC_ALLGATHER] = hb_tree_new((dict_cmp_func)NBC_Allgather_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete);
|
||||
if(comminfo->NBC_Dict[NBC_ALLGATHER] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; }
|
||||
NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_ALLGATHER]);
|
||||
comminfo->NBC_Dict_size[NBC_ALLGATHER] = 0;
|
||||
/* initialize the NBC_ALLREDUCE SchedCache tree */
|
||||
comminfo->NBC_Dict[NBC_ALLREDUCE] = hb_tree_new((dict_cmp_func)NBC_Allreduce_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete);
|
||||
if(comminfo->NBC_Dict[NBC_ALLREDUCE] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; }
|
||||
NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_ALLREDUCE]);
|
||||
comminfo->NBC_Dict_size[NBC_ALLREDUCE] = 0;
|
||||
/* initialize the NBC_BARRIER SchedCache tree - is not needed -
|
||||
* schedule is hung off directly */
|
||||
comminfo->NBC_Dict_size[NBC_BARRIER] = 0;
|
||||
/* initialize the NBC_BCAST SchedCache tree */
|
||||
comminfo->NBC_Dict[NBC_BCAST] = hb_tree_new((dict_cmp_func)NBC_Bcast_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete);
|
||||
if(comminfo->NBC_Dict[NBC_BCAST] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; }
|
||||
NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_BCAST]);
|
||||
comminfo->NBC_Dict_size[NBC_BCAST] = 0;
|
||||
/* initialize the NBC_GATHER SchedCache tree */
|
||||
comminfo->NBC_Dict[NBC_GATHER] = hb_tree_new((dict_cmp_func)NBC_Gather_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete);
|
||||
if(comminfo->NBC_Dict[NBC_GATHER] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; }
|
||||
NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_GATHER]);
|
||||
comminfo->NBC_Dict_size[NBC_GATHER] = 0;
|
||||
/* initialize the NBC_REDUCE SchedCache tree */
|
||||
comminfo->NBC_Dict[NBC_REDUCE] = hb_tree_new((dict_cmp_func)NBC_Reduce_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete);
|
||||
if(comminfo->NBC_Dict[NBC_REDUCE] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; }
|
||||
NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_REDUCE]);
|
||||
comminfo->NBC_Dict_size[NBC_REDUCE] = 0;
|
||||
/* initialize the NBC_SCAN SchedCache tree */
|
||||
comminfo->NBC_Dict[NBC_SCAN] = hb_tree_new((dict_cmp_func)NBC_Scan_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete);
|
||||
if(comminfo->NBC_Dict[NBC_SCAN] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; }
|
||||
NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_SCAN]);
|
||||
comminfo->NBC_Dict_size[NBC_SCAN] = 0;
|
||||
/* initialize the NBC_SCATTER SchedCache tree */
|
||||
comminfo->NBC_Dict[NBC_SCATTER] = hb_tree_new((dict_cmp_func)NBC_Scatter_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete);
|
||||
if(comminfo->NBC_Dict[NBC_SCATTER] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; }
|
||||
NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_SCATTER]);
|
||||
comminfo->NBC_Dict_size[NBC_SCATTER] = 0;
|
||||
#endif
|
||||
|
||||
#ifndef OMPI_COMPONENT
|
||||
/* put the new attribute to the comm */
|
||||
res = MPI_Attr_put(comm, gkeyval, comminfo);
|
||||
if((MPI_SUCCESS != res)) { printf("Error in MPI_Attr_put() (%i)\n", res); return NULL; }
|
||||
#endif
|
||||
|
||||
return comminfo;
|
||||
}
|
||||
|
||||
__inline__ int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule) {
|
||||
int res;
|
||||
|
||||
handle->schedule = schedule;
|
||||
/* kick off first round */
|
||||
res = NBC_Start_round(handle);
|
||||
if((NBC_OK != res)) { printf("Error in NBC_Start_round() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
int NBC_Wait_poll(NBC_Handle *handle) {
|
||||
int res;
|
||||
|
||||
while(NBC_OK != NBC_Progress(handle));
|
||||
|
||||
res = NBC_Free(handle);
|
||||
if((NBC_OK != res)) { printf("Error in NBC_Free() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
int NBC_Wait(NBC_Handle *handle) {
|
||||
int res;
|
||||
|
||||
NBC_Progress_block(handle);
|
||||
|
||||
res = NBC_Free(handle);
|
||||
if((NBC_OK != res)) { printf("Error in NBC_Free() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
void NBC_SchedCache_args_delete_key_dummy(void *k) {
|
||||
/* do nothing because the key and the data element are identical :-)
|
||||
* both (the single one :) is freed in NBC_<COLLOP>_args_delete() */
|
||||
}
|
||||
|
||||
|
||||
void NBC_SchedCache_args_delete(void *entry) {
|
||||
struct NBC_dummyarg *tmp;
|
||||
|
||||
tmp = (struct NBC_dummyarg*)entry;
|
||||
/* free taglistentry */
|
||||
free(*(tmp->schedule));
|
||||
/* the schedule pointer itself is also malloc'd */
|
||||
free(tmp->schedule);
|
||||
free(tmp);
|
||||
}
|
||||
#endif
|
657
ompi/mca/coll/libnbc/nbc.h
Обычный файл
657
ompi/mca/coll/libnbc/nbc.h
Обычный файл
@ -0,0 +1,657 @@
|
||||
#ifndef __NBC_H__
|
||||
#define __NBC_H__
|
||||
|
||||
/*********************** LibNBC tuning parameters ************************/
|
||||
|
||||
/* the debug level */
|
||||
#define DLEVEL 0
|
||||
|
||||
/* use MPI, InfiniBand (define IB), or Open MPI (OMPI_COMPONENT) backend */
|
||||
#define OMPI_COMPONENT
|
||||
|
||||
/* enable schedule caching - undef NBC_CACHE_SCHEDULE to deactivate it */
|
||||
#define NBC_CACHE_SCHEDULE
|
||||
#define SCHED_DICT_UPPER 1024 /* max. number of dict entries */
|
||||
#define SCHED_DICT_LOWER 512 /* nuber of dict entries after wipe, if SCHED_DICT_UPPER is reached */
|
||||
|
||||
/********************* end of LibNBC tuning parameters ************************/
|
||||
|
||||
#ifndef OMPI_COMPONENT
|
||||
/* include mpi.h directly in case of OMPI */
|
||||
#include <mpi.h>
|
||||
#else
|
||||
#include "ompi_config.h"
|
||||
#include "mpi.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/datatype/datatype.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "dict.h"
|
||||
#ifdef IB
|
||||
#include "ib.h"
|
||||
#endif
|
||||
|
||||
/* if we use MPI-1, MPI_IN_PLACE is not defined :-( */
|
||||
#ifndef MPI_IN_PLACE
|
||||
#define MPI_IN_PLACE (void*)1
|
||||
#endif
|
||||
|
||||
/* restore inline behavior for non-gcc compilers */
|
||||
#ifndef __GNUC__
|
||||
#define __inline__ inline
|
||||
#endif
|
||||
|
||||
/* log(2) */
|
||||
#define LOG2 0.69314718055994530941
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Function return codes */
|
||||
#define NBC_OK 0 /* everything went fine */
|
||||
#define NBC_SUCCESS 0 /* everything went fine (MPI compliant :) */
|
||||
#define NBC_OOR 1 /* out of resources */
|
||||
#define NBC_BAD_SCHED 2 /* bad schedule */
|
||||
#define NBC_CONTINUE 3 /* progress not done */
|
||||
#define NBC_DATATYPE_NOT_SUPPORTED 4 /* datatype not supported or not valid */
|
||||
#define NBC_OP_NOT_SUPPORTED 5 /* operation not supported or not valid */
|
||||
#define NBC_NOT_IMPLEMENTED 6
|
||||
#define NBC_INVALID_PARAM 7 /* invalid parameters */
|
||||
|
||||
/* true/false */
|
||||
#define true 1
|
||||
#define false 0
|
||||
|
||||
/* all collectives */
|
||||
#define NBC_ALLGATHER 0
|
||||
#define NBC_ALLGATHERV 1
|
||||
#define NBC_ALLREDUCE 2
|
||||
#define NBC_ALLTOALL 3
|
||||
#define NBC_ALLTOALLV 4
|
||||
#define NBC_ALLTOALLW 5
|
||||
#define NBC_BARRIER 6
|
||||
#define NBC_BCAST 7
|
||||
#define NBC_EXSCAN 8
|
||||
#define NBC_GATHER 9
|
||||
#define NBC_GATHERV 10
|
||||
#define NBC_REDUCE 11
|
||||
#define NBC_REDUCESCAT 12
|
||||
#define NBC_SCAN 13
|
||||
#define NBC_SCATTER 14
|
||||
#define NBC_SCATTERV 15
|
||||
#define NBC_NUM_COLL 16
|
||||
|
||||
/* dirty trick to avoid fn call :) */
|
||||
#define NBC_Test NBC_Progress
|
||||
|
||||
/* several typedefs for NBC */
|
||||
|
||||
/* a schedule is basically a pointer to some memory location where the
|
||||
* schedule array resides */
|
||||
typedef void* NBC_Schedule;
|
||||
|
||||
/* the function type enum */
|
||||
typedef enum {
|
||||
SEND,
|
||||
RECV,
|
||||
OP,
|
||||
COPY,
|
||||
UNPACK
|
||||
} NBC_Fn_type;
|
||||
|
||||
/* the send argument struct */
|
||||
typedef struct {
|
||||
void *buf;
|
||||
char tmpbuf;
|
||||
int count;
|
||||
MPI_Datatype datatype;
|
||||
int dest;
|
||||
} NBC_Args_send;
|
||||
|
||||
/* the receive argument struct */
|
||||
typedef struct {
|
||||
void *buf;
|
||||
char tmpbuf;
|
||||
int count;
|
||||
MPI_Datatype datatype;
|
||||
int source;
|
||||
} NBC_Args_recv;
|
||||
|
||||
/* the operation argument struct */
|
||||
typedef struct {
|
||||
void *buf1;
|
||||
char tmpbuf1;
|
||||
void *buf2;
|
||||
char tmpbuf2;
|
||||
void *buf3;
|
||||
char tmpbuf3;
|
||||
int count;
|
||||
MPI_Op op;
|
||||
MPI_Datatype datatype;
|
||||
} NBC_Args_op;
|
||||
|
||||
/* the copy argument struct */
|
||||
typedef struct {
|
||||
void *src;
|
||||
char tmpsrc;
|
||||
int srccount;
|
||||
MPI_Datatype srctype;
|
||||
void *tgt;
|
||||
char tmptgt;
|
||||
int tgtcount;
|
||||
MPI_Datatype tgttype;
|
||||
} NBC_Args_copy;
|
||||
|
||||
/* unpack operation arguments */
|
||||
typedef struct {
|
||||
void *inbuf;
|
||||
char tmpinbuf;
|
||||
int count;
|
||||
MPI_Datatype datatype;
|
||||
void *outbuf;
|
||||
char tmpoutbuf;
|
||||
} NBC_Args_unpack;
|
||||
|
||||
/* used to hang off a communicator */
|
||||
typedef struct {
|
||||
MPI_Comm mycomm; /* save the shadow communicator here */
|
||||
int tag;
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
hb_tree *NBC_Dict[NBC_NUM_COLL]; /* this should be an array */
|
||||
int NBC_Dict_size[NBC_NUM_COLL];
|
||||
#endif
|
||||
} NBC_Comminfo;
|
||||
|
||||
/* thread specific data */
|
||||
typedef struct {
|
||||
MPI_Comm comm;
|
||||
MPI_Comm mycomm;
|
||||
long row_offset;
|
||||
int tag;
|
||||
int req_count;
|
||||
#ifdef OMPI_COMPONENT
|
||||
/*ompi_request_t **req_array;*/
|
||||
MPI_Request *req_array;
|
||||
#endif
|
||||
#ifdef MPI
|
||||
MPI_Request *req_array;
|
||||
#endif
|
||||
#ifdef IB
|
||||
IB_Request *req_array;
|
||||
#endif
|
||||
NBC_Comminfo *comminfo;
|
||||
NBC_Schedule *schedule;
|
||||
void *tmpbuf; /* temporary buffer e.g. used for Reduce */
|
||||
/* we should make a handle pointer to a state later */
|
||||
} NBC_Handle;
|
||||
|
||||
/* internal function prototypes */
|
||||
int NBC_Sched_create(NBC_Schedule* schedule);
|
||||
int NBC_Sched_send(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule);
|
||||
int NBC_Sched_recv(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule);
|
||||
int NBC_Sched_op(void* buf3, char tmpbuf3, void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule);
|
||||
int NBC_Sched_copy(void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, MPI_Datatype tgttype, NBC_Schedule *schedule);
|
||||
int NBC_Sched_unpack(void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, NBC_Schedule *schedule);
|
||||
int NBC_Sched_barrier(NBC_Schedule *schedule);
|
||||
int NBC_Sched_commit(NBC_Schedule *schedule);
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* this is a dummy structure which is used to get the schedule out of
|
||||
* the collop sepcific structure. The schedule pointer HAS to be at the
|
||||
* first position and should NOT BE REORDERED by the compiler (C
|
||||
* guarantees that */
|
||||
struct NBC_dummyarg {
|
||||
NBC_Schedule *schedule;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
NBC_Schedule *schedule;
|
||||
void *sendbuf;
|
||||
int sendcount;
|
||||
MPI_Datatype sendtype;
|
||||
void* recvbuf;
|
||||
int recvcount;
|
||||
MPI_Datatype recvtype;
|
||||
} NBC_Alltoall_args;
|
||||
int NBC_Alltoall_args_compare(NBC_Alltoall_args *a, NBC_Alltoall_args *b, void *param);
|
||||
|
||||
typedef struct {
|
||||
NBC_Schedule *schedule;
|
||||
void *sendbuf;
|
||||
int sendcount;
|
||||
MPI_Datatype sendtype;
|
||||
void* recvbuf;
|
||||
int recvcount;
|
||||
MPI_Datatype recvtype;
|
||||
} NBC_Allgather_args;
|
||||
int NBC_Allgather_args_compare(NBC_Allgather_args *a, NBC_Allgather_args *b, void *param);
|
||||
|
||||
typedef struct {
|
||||
NBC_Schedule *schedule;
|
||||
void *sendbuf;
|
||||
void* recvbuf;
|
||||
int count;
|
||||
MPI_Datatype datatype;
|
||||
MPI_Op op;
|
||||
} NBC_Allreduce_args;
|
||||
int NBC_Allreduce_args_compare(NBC_Allreduce_args *a, NBC_Allreduce_args *b, void *param);
|
||||
|
||||
typedef struct {
|
||||
NBC_Schedule *schedule;
|
||||
void *buffer;
|
||||
int count;
|
||||
MPI_Datatype datatype;
|
||||
int root;
|
||||
} NBC_Bcast_args;
|
||||
int NBC_Bcast_args_compare(NBC_Bcast_args *a, NBC_Bcast_args *b, void *param);
|
||||
|
||||
typedef struct {
|
||||
NBC_Schedule *schedule;
|
||||
void *sendbuf;
|
||||
int sendcount;
|
||||
MPI_Datatype sendtype;
|
||||
void* recvbuf;
|
||||
int recvcount;
|
||||
MPI_Datatype recvtype;
|
||||
int root;
|
||||
} NBC_Gather_args;
|
||||
int NBC_Gather_args_compare(NBC_Gather_args *a, NBC_Gather_args *b, void *param);
|
||||
|
||||
typedef struct {
|
||||
NBC_Schedule *schedule;
|
||||
void *sendbuf;
|
||||
void* recvbuf;
|
||||
int count;
|
||||
MPI_Datatype datatype;
|
||||
MPI_Op op;
|
||||
int root;
|
||||
} NBC_Reduce_args;
|
||||
int NBC_Reduce_args_compare(NBC_Reduce_args *a, NBC_Reduce_args *b, void *param);
|
||||
|
||||
typedef struct {
|
||||
NBC_Schedule *schedule;
|
||||
void *sendbuf;
|
||||
void* recvbuf;
|
||||
int count;
|
||||
MPI_Datatype datatype;
|
||||
MPI_Op op;
|
||||
} NBC_Scan_args;
|
||||
int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param);
|
||||
|
||||
typedef struct {
|
||||
NBC_Schedule *schedule;
|
||||
void *sendbuf;
|
||||
int sendcount;
|
||||
MPI_Datatype sendtype;
|
||||
void* recvbuf;
|
||||
int recvcount;
|
||||
MPI_Datatype recvtype;
|
||||
int root;
|
||||
} NBC_Scatter_args;
|
||||
int NBC_Scatter_args_compare(NBC_Scatter_args *a, NBC_Scatter_args *b, void *param);
|
||||
|
||||
|
||||
/* Schedule cache structures/functions */
|
||||
u_int32_t adler32(u_int32_t adler, int8_t *buf, int len);
|
||||
void NBC_SchedCache_args_delete(void *entry);
|
||||
void NBC_SchedCache_args_delete_key_dummy(void *k);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
int NBC_Free(NBC_Handle *handle);
|
||||
int NBC_Progress(NBC_Handle *handle);
|
||||
int NBC_Progress_block(NBC_Handle *handle);
|
||||
__inline__ int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule);
|
||||
__inline__ int NBC_Init_handle(NBC_Handle *handle, MPI_Comm comm);
|
||||
int NBC_Operation(void *buf3, void *buf1, void *buf2, MPI_Op op, MPI_Datatype type, int count);
|
||||
static __inline__ int NBC_Type_intrinsic(MPI_Datatype type);
|
||||
static __inline__ int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm);
|
||||
__inline__ NBC_Comminfo* NBC_Init_comm(MPI_Comm comm);
|
||||
|
||||
/* external function prototypes */
|
||||
int NBC_Ibarrier(MPI_Comm comm, NBC_Handle* handle);
|
||||
int NBC_Ibcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, NBC_Handle* handle);
|
||||
int NBC_Ibcast_lin(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, NBC_Handle* handle);
|
||||
int NBC_Ialltoallv(void* sendbuf, int *sendcounts, int *sdispls, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *rdispls, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle* handle);
|
||||
int NBC_Igather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle);
|
||||
int NBC_Igatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle);
|
||||
int NBC_Iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle);
|
||||
int NBC_Iscatterv(void* sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle);
|
||||
int NBC_Iallgather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle);
|
||||
int NBC_Iallgatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle);
|
||||
int NBC_Ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle);
|
||||
int NBC_Ireduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, NBC_Handle* handle);
|
||||
int NBC_Iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, NBC_Handle* handle);
|
||||
int NBC_Iscan(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, NBC_Handle* handle);
|
||||
int NBC_Ireduce_scatter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, NBC_Handle* handle);
|
||||
int NBC_Wait(NBC_Handle *handle);
|
||||
int NBC_Wait_poll(NBC_Handle *handle);
|
||||
|
||||
/* some macros */
|
||||
|
||||
/* a schedule has the following format:
|
||||
* [schedule] ::= [size][round-schedule][delimiter][round-schedule][delimiter]...[end]
|
||||
* [size] ::= size of the schedule (int)
|
||||
* [round-schedule] ::= [num][type][type-args][type][type-args]...
|
||||
* [num] ::= number of elements in round (int)
|
||||
* [type] ::= function type (NBC_Fn_type)
|
||||
* [type-args] ::= type specific arguments (NBC_Args_send, NBC_Args_recv or, NBC_Args_op)
|
||||
* [delimiter] ::= 1 (char) - indicates that a round follows
|
||||
* [end] ::= 0 (char) - indicates that this is the last round
|
||||
*/
|
||||
|
||||
/* NBC_GET_ROUND_SIZE returns the size in bytes of a round of a NBC_Schedule
|
||||
* schedule. A round has the format:
|
||||
* [num]{[type][type-args]}
|
||||
* e.g. [(int)2][(NBC_Fn_type)SEND][(NBC_Args_send)SEND-ARGS][(NBC_Fn_type)RECV][(NBC_Args_recv)RECV-ARGS] */
|
||||
#define NBC_GET_ROUND_SIZE(schedule, size) \
|
||||
{ \
|
||||
int *numptr; \
|
||||
NBC_Fn_type *typeptr; \
|
||||
int i; \
|
||||
\
|
||||
numptr = (int*)schedule; \
|
||||
/*NBC_DEBUG(10, "GET_ROUND_SIZE got %i elements\n", *numptr); */\
|
||||
/* end is increased by sizeof(int) bytes to point to type */ \
|
||||
typeptr = (NBC_Fn_type*)((int*)(schedule)+1); \
|
||||
for (i=0; i<*numptr; i++) { \
|
||||
/* go sizeof op-data forward */ \
|
||||
switch(*typeptr) { \
|
||||
case SEND: \
|
||||
/*printf("found a SEND at offset %i\n", (int)typeptr-(int)schedule); */\
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_send*)typeptr+1); \
|
||||
break; \
|
||||
case RECV: \
|
||||
/*printf("found a RECV at offset %i\n", (int)typeptr-(int)schedule); */\
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_recv*)typeptr+1); \
|
||||
break; \
|
||||
case OP: \
|
||||
/*printf("found a OP at offset %i\n", (int)typeptr-(int)schedule); */\
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_op*)typeptr+1); \
|
||||
break; \
|
||||
case COPY: \
|
||||
/*printf("found a COPY at offset %i\n", (int)typeptr-(int)schedule); */\
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_copy*)typeptr+1); \
|
||||
break; \
|
||||
case UNPACK: \
|
||||
/*printf("found a UNPACK at offset %i\n", (int)typeptr-(int)schedule); */\
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_unpack*)typeptr+1); \
|
||||
break; \
|
||||
default: \
|
||||
printf("NBC_GET_ROUND_SIZE: bad type %li at offset %li\n", (long)*typeptr, (long)typeptr-(long)schedule); \
|
||||
return NBC_BAD_SCHED; \
|
||||
} \
|
||||
/* increase ptr by size of fn_type enum */ \
|
||||
typeptr = (NBC_Fn_type*)((NBC_Fn_type*)typeptr+1); \
|
||||
} \
|
||||
/* this could be optimized if typeptr would be used directly */ \
|
||||
size = (long)typeptr-(long)schedule; \
|
||||
}
|
||||
|
||||
/* returns the size of a schedule in bytes */
|
||||
#define NBC_GET_SIZE(schedule, size) \
|
||||
{ \
|
||||
size=*(int*)schedule; \
|
||||
}
|
||||
|
||||
/* increase the size of a schedule by size bytes */
|
||||
#define NBC_INC_SIZE(schedule, size) \
|
||||
{ \
|
||||
*(int*)schedule+=size; \
|
||||
}
|
||||
|
||||
/* increments the number of operations in the last round */
|
||||
#define NBC_INC_NUM_ROUND(schedule) \
|
||||
{ \
|
||||
int total_size; \
|
||||
long round_size; \
|
||||
char *ptr, *lastround; \
|
||||
\
|
||||
NBC_GET_SIZE(schedule, total_size); \
|
||||
\
|
||||
/* ptr begins at first round (first int is overall size) */ \
|
||||
ptr = (char*)((char*)schedule+sizeof(int)); \
|
||||
lastround = ptr; \
|
||||
while ((long)ptr-(long)schedule < total_size) { \
|
||||
NBC_GET_ROUND_SIZE(ptr, round_size); \
|
||||
/*printf("got round size %i\n", round_size);*/ \
|
||||
lastround = ptr; \
|
||||
/* add round size */ \
|
||||
ptr=ptr+round_size; \
|
||||
/* add sizeof(char) as barrier delimiter */ \
|
||||
ptr=ptr+sizeof(char); \
|
||||
/*printf("(int)ptr-(int)schedule=%i, size=%i\n", (int)ptr-(int)schedule, size); */\
|
||||
} \
|
||||
/*printf("lastround count is at offset: %i\n", (int)lastround-(int)schedule);*/ \
|
||||
/* this is the count in the last round of the schedule */ \
|
||||
(*(int*)lastround)++; \
|
||||
}
|
||||
|
||||
/* NBC_PRINT_ROUND prints a round in a schedule. A round has the format:
|
||||
* [num]{[op][op-data]} types: [int]{[enum][op-type]}
|
||||
* e.g. [2][SEND][SEND-ARGS][RECV][RECV-ARGS] */
|
||||
#define NBC_PRINT_ROUND(schedule) \
|
||||
{ \
|
||||
int myrank, *numptr; \
|
||||
NBC_Fn_type *typeptr; \
|
||||
NBC_Args_send *sendargs; \
|
||||
NBC_Args_recv *recvargs; \
|
||||
NBC_Args_op *opargs; \
|
||||
NBC_Args_copy *copyargs; \
|
||||
NBC_Args_unpack *unpackargs; \
|
||||
int i; \
|
||||
\
|
||||
numptr = (int*)schedule; \
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); \
|
||||
printf("has %i actions: \n", *numptr); \
|
||||
/* end is increased by sizeof(int) bytes to point to type */ \
|
||||
typeptr = (NBC_Fn_type*)((int*)(schedule)+1); \
|
||||
for (i=0; i<*numptr; i++) { \
|
||||
/* go sizeof op-data forward */ \
|
||||
switch(*typeptr) { \
|
||||
case SEND: \
|
||||
printf("[%i] SEND (offset %li) ", myrank, (long)typeptr-(long)schedule); \
|
||||
sendargs = (NBC_Args_send*)(typeptr+1); \
|
||||
printf("*buf: %lu, count: %i, type: %lu, dest: %i)\n", (unsigned long)sendargs->buf, sendargs->count, (unsigned long)sendargs->datatype, sendargs->dest); \
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_send*)typeptr+1); \
|
||||
break; \
|
||||
case RECV: \
|
||||
printf("[%i] RECV (offset %li) ", myrank, (long)typeptr-(long)schedule); \
|
||||
recvargs = (NBC_Args_recv*)(typeptr+1); \
|
||||
printf("*buf: %lu, count: %i, type: %lu, source: %i)\n", (unsigned long)recvargs->buf, recvargs->count, (unsigned long)recvargs->datatype, recvargs->source); \
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_recv*)typeptr+1); \
|
||||
break; \
|
||||
case OP: \
|
||||
printf("[%i] OP (offset %li) ", myrank, (long)typeptr-(long)schedule); \
|
||||
opargs = (NBC_Args_op*)(typeptr+1); \
|
||||
printf("*buf1: %lu, buf2: %lu, count: %i, type: %lu)\n", (unsigned long)opargs->buf1, (unsigned long)opargs->buf2, opargs->count, (unsigned long)opargs->datatype); \
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_op*)typeptr+1); \
|
||||
break; \
|
||||
case COPY: \
|
||||
printf("[%i] COPY (offset %li) ", myrank, (long)typeptr-(long)schedule); \
|
||||
copyargs = (NBC_Args_copy*)(typeptr+1); \
|
||||
printf("*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu, tgtcount: %i, tgttype: %lu)\n", (unsigned long)copyargs->src, copyargs->srccount, (unsigned long)copyargs->srctype, (unsigned long)copyargs->tgt, copyargs->tgtcount, (unsigned long)copyargs->tgttype); \
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_copy*)typeptr+1); \
|
||||
break; \
|
||||
case UNPACK: \
|
||||
printf("[%i] UNPACK (offset %li) ", myrank, (long)typeptr-(long)schedule); \
|
||||
unpackargs = (NBC_Args_unpack*)(typeptr+1); \
|
||||
printf("*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu\n",(unsigned long)unpackargs->inbuf, unpackargs->count, (unsigned long)unpackargs->datatype, (unsigned long)unpackargs->outbuf); \
|
||||
typeptr = (NBC_Fn_type*)((NBC_Args_unpack*)typeptr+1); \
|
||||
break; \
|
||||
default: \
|
||||
printf("[%i] NBC_PRINT_ROUND: bad type %li at offset %li\n", myrank, (long)*typeptr, (long)typeptr-(long)schedule); \
|
||||
return NBC_BAD_SCHED; \
|
||||
} \
|
||||
/* increase ptr by size of fn_type enum */ \
|
||||
typeptr = (NBC_Fn_type*)((NBC_Fn_type*)typeptr+1); \
|
||||
} \
|
||||
printf("\n"); \
|
||||
}
|
||||
|
||||
#define NBC_PRINT_SCHED(schedule) \
|
||||
{ \
|
||||
int size, myrank; \
|
||||
long round_size; \
|
||||
char *ptr; \
|
||||
\
|
||||
NBC_GET_SIZE(schedule, size); \
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); \
|
||||
printf("[%i] printing schedule of size %i\n", myrank, size); \
|
||||
\
|
||||
/* ptr begins at first round (first int is overall size) */ \
|
||||
ptr = (char*)((char*)schedule+sizeof(int)); \
|
||||
while ((long)ptr-(long)schedule < size) { \
|
||||
NBC_GET_ROUND_SIZE(ptr, round_size); \
|
||||
printf("[%i] Round at byte %li (size %li) ", myrank, (long)ptr-(long)schedule, round_size); \
|
||||
NBC_PRINT_ROUND(ptr); \
|
||||
/* add round size */ \
|
||||
ptr=ptr+round_size; \
|
||||
/* add sizeof(char) as barrier delimiter */ \
|
||||
ptr=ptr+sizeof(char); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHECK_NULL(ptr) \
|
||||
{ \
|
||||
if(ptr == NULL) { \
|
||||
printf("realloc error :-(\n"); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
#define NBC_DEBUG(level, ...) {}
|
||||
*/
|
||||
|
||||
static __inline__ void NBC_DEBUG(int level, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int rank;
|
||||
|
||||
if(DLEVEL > level) {
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
|
||||
printf("[%i] ", rank);
|
||||
va_start(ap, fmt);
|
||||
vprintf(fmt, ap);
|
||||
va_end (ap);
|
||||
}
|
||||
}
|
||||
|
||||
/* returns true (1) or false (0) if type is intrinsic or not */
|
||||
static __inline__ int NBC_Type_intrinsic(MPI_Datatype type) {
|
||||
|
||||
if( ( type == MPI_INT ) ||
|
||||
( type == MPI_LONG ) ||
|
||||
( type == MPI_SHORT ) ||
|
||||
( type == MPI_UNSIGNED ) ||
|
||||
( type == MPI_UNSIGNED_SHORT ) ||
|
||||
( type == MPI_UNSIGNED_LONG ) ||
|
||||
( type == MPI_FLOAT ) ||
|
||||
( type == MPI_DOUBLE ) ||
|
||||
( type == MPI_LONG_DOUBLE ) ||
|
||||
( type == MPI_BYTE ) ||
|
||||
( type == MPI_FLOAT_INT) ||
|
||||
( type == MPI_DOUBLE_INT) ||
|
||||
( type == MPI_LONG_INT) ||
|
||||
( type == MPI_2INT) ||
|
||||
( type == MPI_SHORT_INT) ||
|
||||
( type == MPI_LONG_DOUBLE_INT))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* let's give a try to inline functions */
|
||||
static __inline__ int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) {
|
||||
int size, pos, res;
|
||||
MPI_Aint ext;
|
||||
void *packbuf;
|
||||
|
||||
if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) {
|
||||
/* if we have the same types and they are contiguous (intrinsic
|
||||
* types are contiguous), we can just use a single memcpy */
|
||||
res = MPI_Type_extent(srctype, &ext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
memcpy(tgt, src, srccount*ext);
|
||||
} else {
|
||||
/* we have to pack and unpack */
|
||||
res = MPI_Pack_size(srccount, srctype, comm, &size);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; }
|
||||
packbuf = malloc(size);
|
||||
if (NULL == packbuf) { printf("Error in malloc()\n"); return res; }
|
||||
pos=0;
|
||||
res = MPI_Pack(src, srccount, srctype, packbuf, size, &pos, comm);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; }
|
||||
pos=0;
|
||||
res = MPI_Unpack(packbuf, size, &pos, tgt, tgtcount, tgttype, comm);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Unpack() (%i)\n", res); return res; }
|
||||
free(packbuf);
|
||||
}
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
static __inline__ int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void *tgt, MPI_Comm comm) {
|
||||
int size, pos, res;
|
||||
MPI_Aint ext;
|
||||
|
||||
if(NBC_Type_intrinsic(srctype)) {
|
||||
/* if we have the same types and they are contiguous (intrinsic
|
||||
* types are contiguous), we can just use a single memcpy */
|
||||
res = MPI_Type_extent(srctype, &ext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
memcpy(tgt, src, srccount*ext);
|
||||
|
||||
} else {
|
||||
/* we have to unpack */
|
||||
res = MPI_Pack_size(srccount, srctype, comm, &size);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; }
|
||||
pos=0;
|
||||
res = MPI_Unpack(src, size, &pos, tgt, srccount, srctype, comm);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Unpack() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* deletes elements from dict until low watermark is reached */
|
||||
static __inline__ void NBC_SchedCache_dictwipe(hb_tree *dict, int *size) {
|
||||
hb_itor *itor;
|
||||
|
||||
itor = hb_itor_new(dict);
|
||||
for (; hb_itor_valid(itor) && (*size>SCHED_DICT_LOWER); hb_itor_next(itor)) {
|
||||
hb_tree_remove(dict, hb_itor_key(itor), 0);
|
||||
*size = *size-1;
|
||||
}
|
||||
hb_itor_destroy(itor);
|
||||
}
|
||||
|
||||
#define NBC_IN_PLACE(sendbuf, recvbuf, inplace) \
|
||||
{ \
|
||||
inplace = 0; \
|
||||
if(sendbuf == MPI_IN_PLACE) { \
|
||||
sendbuf = recvbuf; \
|
||||
inplace = 1; \
|
||||
} \
|
||||
if(recvbuf == MPI_IN_PLACE) { \
|
||||
recvbuf = sendbuf; \
|
||||
inplace = 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
202
ompi/mca/coll/libnbc/nbc_iallgather.c
Обычный файл
202
ompi/mca/coll/libnbc/nbc_iallgather.c
Обычный файл
@ -0,0 +1,202 @@
|
||||
#include "nbc.h"
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* tree comparison function for schedule cache */
|
||||
int NBC_Allgather_args_compare(NBC_Allgather_args *a, NBC_Allgather_args *b, void *param) {
|
||||
|
||||
if( (a->sendbuf == b->sendbuf) &&
|
||||
(a->sendcount == b->sendcount) &&
|
||||
(a->sendtype == b->sendtype) &&
|
||||
(a->recvbuf == b->recvbuf) &&
|
||||
(a->recvcount == b->recvcount) &&
|
||||
(a->recvtype == b->recvtype) ) {
|
||||
return 0;
|
||||
}
|
||||
if( a->sendbuf < b->sendbuf ) {
|
||||
return -1;
|
||||
}
|
||||
return +1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* simple linear MPI_Iallgather
|
||||
* the algorithm uses p-1 rounds
|
||||
* each node sends the packet it received last round (or has in round 0) to it's right neighbor (modulo p)
|
||||
* each node receives from it's left (modulo p) neighbor */
|
||||
int NBC_Iallgather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle) {
|
||||
int rank, p, res, r;
|
||||
MPI_Aint rcvext, sndext;
|
||||
NBC_Schedule *schedule;
|
||||
char *rbuf, *sbuf, inplace;
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
NBC_Allgather_args *args, *found, search;
|
||||
#endif
|
||||
|
||||
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(sendtype, &sndext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(recvtype, &rcvext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
|
||||
handle->tmpbuf = NULL;
|
||||
|
||||
if(!((rank == 0) && inplace)) {
|
||||
/* copy my data to receive buffer */
|
||||
rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext);
|
||||
res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* search schedule in communicator specific tree */
|
||||
search.sendbuf=sendbuf;
|
||||
search.sendcount=sendcount;
|
||||
search.sendtype=sendtype;
|
||||
search.recvbuf=recvbuf;
|
||||
search.recvcount=recvcount;
|
||||
search.recvtype=recvtype;
|
||||
found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_ALLGATHER], &search);
|
||||
if(found == NULL) {
|
||||
#endif
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(NBC_OK != res) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; }
|
||||
|
||||
sbuf = ((char *)recvbuf) + (rank*recvcount*rcvext);
|
||||
/* do p-1 rounds */
|
||||
for(r=0;r<p;r++) {
|
||||
if(r != rank) {
|
||||
/* recv from rank r */
|
||||
rbuf = ((char *)recvbuf) + r*(recvcount*rcvext);
|
||||
res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, r, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
/* send to rank r - not from the sendbuf to optimize MPI_IN_PLACE */
|
||||
res = NBC_Sched_send(sbuf, false, recvcount, recvtype, r, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* save schedule to tree */
|
||||
args = malloc(sizeof(NBC_Allgather_args));
|
||||
args->sendbuf=sendbuf;
|
||||
args->sendcount=sendcount;
|
||||
args->sendtype=sendtype;
|
||||
args->recvbuf=recvbuf;
|
||||
args->recvcount=recvcount;
|
||||
args->recvtype=recvtype;
|
||||
args->schedule=schedule;
|
||||
res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_ALLGATHER], args, args, 0);
|
||||
if(res != 0) printf("error in dict_insert() (%i)\n", res);
|
||||
/* increase number of elements for A2A */
|
||||
if(++handle->comminfo->NBC_Dict_size[NBC_ALLGATHER] > SCHED_DICT_UPPER) {
|
||||
NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_ALLGATHER], &handle->comminfo->NBC_Dict_size[NBC_ALLGATHER]);
|
||||
}
|
||||
} else {
|
||||
/* found schedule */
|
||||
schedule=found->schedule;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*NBC_PRINT_SCHED(*schedule);*/
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
|
||||
/* this is a new possible dissemination based allgather algorithm - we should
|
||||
* try it some time (big comm, small data) */
|
||||
#if 0
|
||||
|
||||
static __inline__ void diss_unpack(int rank, int vrank, int round, int p, int *pos, void *tmpbuf, int datasize, int slotsize, void *recvbuf, int sendcount, MPI_Datatype sendtype, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Schedule *schedule) {
|
||||
int r, res;
|
||||
char *sbuf, *rbuf;
|
||||
|
||||
sbuf = (char *)tmpbuf + (*pos*datasize);
|
||||
rbuf = (char *)recvbuf + (vrank*slotsize);
|
||||
printf("[%i] unpacking tmpbuf pos: %i (%lu) to rbuf elem: %i (%lu) - %i elems, datasize %i\n", rank, *pos, (unsigned long)sbuf, vrank, (unsigned long)rbuf, recvcount, datasize);
|
||||
res = NBC_Sched_unpack(sbuf, recvcount, recvtype, rbuf, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Unpack() (%i)\n", res); }
|
||||
*pos=*pos+1;
|
||||
|
||||
for(r=0; r<=round; r++) {
|
||||
if(r != 0) {
|
||||
diss_unpack(rank, (vrank-(1<<(r-1))+p)%p, r-1, p, pos, tmpbuf, datasize, slotsize, recvbuf, sendcount, sendtype, recvcount, recvtype, comm, schedule);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __inline__ int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle) {
|
||||
int res, r, maxround, size, speer, rpeer, pos, datasize;
|
||||
char *sbuf, *rbuf;
|
||||
|
||||
res = NBC_OK;
|
||||
if(p < 2) return res;
|
||||
|
||||
maxround = (int)ceil((log(p)/LOG2));
|
||||
|
||||
if(NBC_Type_intrinsic(sendtype)) {
|
||||
datasize = sndext*sendcount;
|
||||
} else {
|
||||
res = MPI_Pack_size(sendcount, sendtype, comm, &datasize);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
/* tmpbuf is probably bigger than p -> next power of 2 */
|
||||
handle->tmpbuf=malloc(datasize*(1<<maxround));
|
||||
|
||||
/* copy my send - data to temp send/recv buffer */
|
||||
sbuf = ((char *)sendbuf) + (rank*sendcount*sndext);
|
||||
/* pack send buffer */
|
||||
if(NBC_Type_intrinsic(sendtype)) {
|
||||
/* it is contiguous - we can just memcpy it */
|
||||
memcpy(handle->tmpbuf, sbuf, datasize);
|
||||
} else {
|
||||
pos = 0;
|
||||
res = MPI_Pack(sbuf, sendcount, sendtype, handle->tmpbuf, datasize, &pos, comm);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
printf("[%i] receive buffer is at %lu of size %i, maxround: %i\n", rank, (unsigned long)handle->tmpbuf, (int)sndext*sendcount*(1<<maxround), maxround);
|
||||
for(r = 0; r < maxround; r++) {
|
||||
size = datasize*(1<<r); /* size doubles every round */
|
||||
rbuf = (char*)handle->tmpbuf+size;
|
||||
sbuf = (char*)handle->tmpbuf;
|
||||
|
||||
speer = (rank + (1<<r)) % p;
|
||||
/* add p because modulo does not work with negative values */
|
||||
rpeer = ((rank - (1<<r))+p) % p;
|
||||
|
||||
printf("[%i] receiving %i bytes from host %i into rbuf %lu\n", rank, size, rpeer, (unsigned long)rbuf);
|
||||
res = NBC_Sched_recv(rbuf, size, MPI_BYTE, rpeer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
|
||||
printf("[%i] sending %i bytes to host %i from sbuf %lu\n", rank, size, speer, (unsigned long)sbuf);
|
||||
res = NBC_Sched_send(sbuf, size, MPI_BYTE, speer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
pos = 0;
|
||||
diss_unpack(rank, rank, r, p, &pos, handle->tmpbuf, datasize, recvcount*rcvext, recvbuf, sendcount, sendtype, recvcount, recvtype, comm, schedule);
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
#endif
|
70
ompi/mca/coll/libnbc/nbc_iallgatherv.c
Обычный файл
70
ompi/mca/coll/libnbc/nbc_iallgatherv.c
Обычный файл
@ -0,0 +1,70 @@
|
||||
#include "nbc.h"
|
||||
|
||||
/* an allgatherv schedule can not be cached easily because the contents
|
||||
* ot the recvcounts array may change, so a comparison of the address
|
||||
* would not be sufficient ... we simply do not cache it */
|
||||
|
||||
/* simple linear MPI_Iallgatherv
|
||||
* the algorithm uses p-1 rounds
|
||||
* first round:
|
||||
* each node sends to it's left node (rank+1)%p sendcount elements
|
||||
* each node begins with it's right node (rank-11)%p and receives from it recvcounts[(rank+1)%p] elements
|
||||
* second round:
|
||||
* each node sends to node (rank+2)%p sendcount elements
|
||||
* each node receives from node (rank-2)%p recvcounts[(rank+2)%p] elements */
|
||||
int NBC_Iallgatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle) {
|
||||
int rank, p, res, r, speer, rpeer;
|
||||
MPI_Aint rcvext, sndext;
|
||||
NBC_Schedule *schedule;
|
||||
char *rbuf, *sbuf, inplace;
|
||||
|
||||
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(sendtype, &sndext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(recvtype, &rcvext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
|
||||
|
||||
handle->tmpbuf=NULL;
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; }
|
||||
|
||||
if(!((rank == 0) && inplace)) {
|
||||
/* copy my data to receive buffer */
|
||||
rbuf = ((char *)recvbuf) + (displs[rank]*rcvext);
|
||||
NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcounts[rank], recvtype, comm);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
sbuf = ((char *)recvbuf) + (displs[rank]*rcvext);
|
||||
/* do p-1 rounds */
|
||||
for(r=1;r<p;r++) {
|
||||
speer = (rank+r)%p;
|
||||
rpeer = (rank-r+p)%p;
|
||||
rbuf = ((char *)recvbuf) + (displs[rpeer]*rcvext);
|
||||
|
||||
res = NBC_Sched_recv(rbuf, false, recvcounts[rpeer], recvtype, rpeer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
/* send to rank r - not from the sendbuf to optimize MPI_IN_PLACE */
|
||||
res = NBC_Sched_send(sbuf, false, recvcounts[0], recvtype, speer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
332
ompi/mca/coll/libnbc/nbc_iallreduce.c
Обычный файл
332
ompi/mca/coll/libnbc/nbc_iallreduce.c
Обычный файл
@ -0,0 +1,332 @@
|
||||
#include "nbc.h"
|
||||
static __inline__ int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle);
|
||||
static __inline__ int allred_sched_chain(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize);
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* tree comparison function for schedule cache */
|
||||
int NBC_Allreduce_args_compare(NBC_Allreduce_args *a, NBC_Allreduce_args *b, void *param) {
|
||||
|
||||
if( (a->sendbuf == b->sendbuf) &&
|
||||
(a->recvbuf == b->recvbuf) &&
|
||||
(a->count == b->count) &&
|
||||
(a->datatype == b->datatype) &&
|
||||
(a->op == b->op) ) {
|
||||
return 0;
|
||||
}
|
||||
if( a->sendbuf < b->sendbuf ) {
|
||||
return -1;
|
||||
}
|
||||
return +1;
|
||||
}
|
||||
#endif
|
||||
|
||||
int NBC_Iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, NBC_Handle* handle) {
|
||||
int rank, p, res, size, segsize;
|
||||
MPI_Aint ext;
|
||||
NBC_Schedule *schedule;
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
NBC_Allreduce_args *args, *found, search;
|
||||
#endif
|
||||
enum { NBC_ARED_BINOMIAL, NBC_ARED_CHAIN } alg;
|
||||
char inplace;
|
||||
|
||||
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(datatype, &ext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
res = MPI_Type_size(datatype, &size);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
|
||||
handle->tmpbuf = malloc(ext*count);
|
||||
if(handle->tmpbuf == NULL) { printf("Error in malloc() (%i)\n", res); return NBC_OOR; }
|
||||
|
||||
if((p == 1) && !inplace) {
|
||||
/* for a single node - copy data to receivebuf */
|
||||
res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
/* algorithm selection */
|
||||
if(p < 4 || size*count < 65536) {
|
||||
alg = NBC_ARED_BINOMIAL;
|
||||
} else if(size*count < 262144){
|
||||
alg = NBC_ARED_CHAIN;
|
||||
segsize = 16384/2;
|
||||
} else {
|
||||
alg = NBC_ARED_CHAIN;
|
||||
segsize = 65536/2;
|
||||
}
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* search schedule in communicator specific tree */
|
||||
search.sendbuf=sendbuf;
|
||||
search.recvbuf=recvbuf;
|
||||
search.count=count;
|
||||
search.datatype=datatype;
|
||||
search.op=op;
|
||||
found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_ALLREDUCE], &search);
|
||||
if(found == NULL) {
|
||||
#endif
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
|
||||
|
||||
switch(alg) {
|
||||
case NBC_ARED_BINOMIAL:
|
||||
res = allred_sched_diss(rank, p, count, datatype, sendbuf, recvbuf, op, schedule, handle);
|
||||
break;
|
||||
case NBC_ARED_CHAIN:
|
||||
res = allred_sched_chain(rank, p, count, datatype, sendbuf, recvbuf, op, size, ext, schedule, handle, segsize);
|
||||
break;
|
||||
}
|
||||
if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; }
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* save schedule to tree */
|
||||
args = malloc(sizeof(NBC_Allreduce_args));
|
||||
args->sendbuf=sendbuf;
|
||||
args->recvbuf=recvbuf;
|
||||
args->count=count;
|
||||
args->datatype=datatype;
|
||||
args->op=op;
|
||||
args->schedule=schedule;
|
||||
res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_ALLREDUCE], args, args, 0);
|
||||
if(res != 0) printf("error in dict_insert() (%i)\n", res);
|
||||
/* increase number of elements for A2A */
|
||||
if(++handle->comminfo->NBC_Dict_size[NBC_ALLREDUCE] > SCHED_DICT_UPPER) {
|
||||
NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_ALLREDUCE], &handle->comminfo->NBC_Dict_size[NBC_ALLREDUCE]);
|
||||
}
|
||||
} else {
|
||||
/* found schedule */
|
||||
schedule=found->schedule;
|
||||
}
|
||||
#endif
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
/* tmpbuf is freed with the handle */
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
|
||||
/* binomial allreduce (binomial tree up and binomial bcast down)
|
||||
* working principle:
|
||||
* - each node gets a virtual rank vrank
|
||||
* - the 'root' node get vrank 0
|
||||
* - node 0 gets the vrank of the 'root'
|
||||
* - all other ranks stay identical (they do not matter)
|
||||
*
|
||||
* Algorithm:
|
||||
* pairwise exchange
|
||||
* round r:
|
||||
* grp = rank % 2^r
|
||||
* if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value
|
||||
* if grp == 1: send to rank - 2^(r-1) and exit function
|
||||
*
|
||||
* do this for R=log_2(p) rounds
|
||||
* followed by a Bcast:
|
||||
* Algorithm:
|
||||
* - each node with vrank > 2^r and vrank < 2^r+1 receives from node
|
||||
* vrank - 2^r (vrank=1 receives from 0, vrank 0 receives never)
|
||||
* - each node sends each round r to node vrank + 2^r
|
||||
* - a node stops to send if 2^r > commsize
|
||||
*
|
||||
*/
|
||||
#define RANK2VRANK(rank, vrank, root) \
|
||||
{ \
|
||||
vrank = rank; \
|
||||
if (rank == 0) vrank = root; \
|
||||
if (rank == root) vrank = 0; \
|
||||
}
|
||||
#define VRANK2RANK(rank, vrank, root) \
|
||||
{ \
|
||||
rank = vrank; \
|
||||
if (vrank == 0) rank = root; \
|
||||
if (vrank == root) rank = 0; \
|
||||
}
|
||||
static __inline__ int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle) {
|
||||
int root, vrank, r, maxr, firstred, vpeer, peer, res;
|
||||
|
||||
root = 0; /* this makes the code for ireduce and iallreduce nearly identical - could be changed to improve performance */
|
||||
RANK2VRANK(rank, vrank, root);
|
||||
maxr = (int)ceil((log(p)/LOG2));
|
||||
|
||||
firstred = 1;
|
||||
for(r=1; r<=maxr; r++) {
|
||||
if((vrank % (1<<r)) == 0) {
|
||||
/* we have to receive this round */
|
||||
vpeer = vrank + (1<<(r-1));
|
||||
VRANK2RANK(peer, vpeer, root)
|
||||
if(peer<p) {
|
||||
res = NBC_Sched_recv(0, true, count, datatype, peer, schedule);
|
||||
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
/* we have to wait until we have the data */
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
if(firstred) {
|
||||
/* perform the reduce with the senbuf */
|
||||
res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule);
|
||||
firstred = 0;
|
||||
} else {
|
||||
/* perform the reduce in my local buffer */
|
||||
res = NBC_Sched_op(recvbuf, false, recvbuf, false, 0, true, count, datatype, op, schedule);
|
||||
}
|
||||
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; }
|
||||
/* this cannot be done until handle->tmpbuf is unused :-( */
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
}
|
||||
} else {
|
||||
/* we have to send this round */
|
||||
vpeer = vrank - (1<<(r-1));
|
||||
VRANK2RANK(peer, vpeer, root)
|
||||
if(firstred) {
|
||||
/* we have to use the sendbuf in the first round .. */
|
||||
res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule);
|
||||
} else {
|
||||
/* and the recvbuf in all remeining rounds */
|
||||
res = NBC_Sched_send(recvbuf, false, count, datatype, peer, schedule);
|
||||
}
|
||||
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
/* leave the game */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* this is the Bcast part - copied with minor changes from nbc_ibcast.c
|
||||
* changed: buffer -> recvbuf */
|
||||
RANK2VRANK(rank, vrank, root);
|
||||
|
||||
/* receive from the right hosts */
|
||||
if(vrank != 0) {
|
||||
for(r=0; r<maxr; r++) {
|
||||
if((vrank >= (1<<r)) && (vrank < (1<<(r+1)))) {
|
||||
VRANK2RANK(peer, vrank-(1<<r), root);
|
||||
res = NBC_Sched_recv(recvbuf, false, count, datatype, peer, schedule);
|
||||
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if(NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
/* now send to the right hosts */
|
||||
for(r=0; r<maxr; r++) {
|
||||
if(((vrank + (1<<r) < p) && (vrank < (1<<r))) || (vrank == 0)) {
|
||||
VRANK2RANK(peer, vrank+(1<<r), root);
|
||||
res = NBC_Sched_send(recvbuf, false, count, datatype, peer, schedule);
|
||||
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
/* end of the bcast */
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
static __inline__ int allred_sched_chain(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize) {
|
||||
int res, rrpeer, rbpeer, srpeer, sbpeer, numfrag, fragnum, fragcount, thiscount, bstart, bend;
|
||||
long roffset, boffset;
|
||||
|
||||
/* reduce peers */
|
||||
rrpeer = rank+1;
|
||||
srpeer = rank-1;
|
||||
/* bcast peers */
|
||||
rbpeer = rank-1;
|
||||
sbpeer = rank+1;
|
||||
|
||||
if(count == 0) return NBC_OK;
|
||||
|
||||
numfrag = count*size/fragsize;
|
||||
if((count*size)%fragsize != 0) numfrag++;
|
||||
fragcount = count/numfrag;
|
||||
|
||||
/* determine the starting round of bcast ... the first reduced packet
|
||||
* is after p-1 rounds at rank 0 and will be sent back ... */
|
||||
bstart = p-1+rank;
|
||||
/* determine the ending round of bcast ... after arrival of the first
|
||||
* packet, each rank has to forward numfrag packets */
|
||||
bend = bstart+numfrag;
|
||||
//printf("[%i] numfrag: %i, count: %i, size: %i, fragcount: %i, bstart: %i, bend: %i\n", rank, numfrag, count, size, fragcount, bstart, bend);
|
||||
|
||||
/* this are two loops in one - this is a little nasty :-( */
|
||||
for(fragnum = 0; fragnum < bend; fragnum++) {
|
||||
roffset = fragnum*fragcount*ext;
|
||||
boffset = (fragnum-bstart)*fragcount*ext;
|
||||
thiscount = fragcount;
|
||||
|
||||
/* first numfrag rounds ... REDUCE to rank 0 */
|
||||
if(fragnum < numfrag) {
|
||||
if(fragnum == numfrag-1) {
|
||||
/* last fragment may not be full */
|
||||
thiscount = count-fragcount*fragnum;
|
||||
}
|
||||
//printf("[%i] reduce %i elements from %lu\n", rank, thiscount, roffset);
|
||||
|
||||
/* REDUCE - PART last node does not recv */
|
||||
if(rank != p-1) {
|
||||
res = NBC_Sched_recv((char*)roffset, true, thiscount, datatype, rrpeer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
/* root reduces into receivebuf */
|
||||
if(rank == 0) {
|
||||
res = NBC_Sched_op((char*)recvbuf+roffset, false, (char*)sendbuf+roffset, false, (char*)roffset, true, thiscount, datatype, op, schedule);
|
||||
} else {
|
||||
res = NBC_Sched_op((char*)roffset, true, (char*)sendbuf+roffset, false, (char*)roffset, true, thiscount, datatype, op, schedule);
|
||||
}
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
}
|
||||
|
||||
/* REDUCE PART root does not send */
|
||||
if(rank != 0) {
|
||||
/* rank p-1 has to send out of sendbuffer :) */
|
||||
if(rank == p-1) {
|
||||
res = NBC_Sched_send((char*)sendbuf+roffset, false, thiscount, datatype, srpeer, schedule);
|
||||
} else {
|
||||
res = NBC_Sched_send((char*)roffset, true, thiscount, datatype, srpeer, schedule);
|
||||
}
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
/* this barrier here seems awkward but isn't!!!! */
|
||||
//res = NBC_Sched_barrier(schedule);
|
||||
}
|
||||
}
|
||||
|
||||
/* BCAST from rank 0 */
|
||||
if(fragnum >= bstart) {
|
||||
//printf("[%i] bcast %i elements from %lu\n", rank, thiscount, boffset);
|
||||
if(fragnum == bend-1) {
|
||||
/* last fragment may not be full */
|
||||
thiscount = count-fragcount*(fragnum-bstart);
|
||||
}
|
||||
|
||||
/* BCAST PART root does not receive */
|
||||
if(rank != 0) {
|
||||
res = NBC_Sched_recv((char*)recvbuf+boffset, false, thiscount, datatype, rbpeer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
}
|
||||
|
||||
/* BCAST PART last rank does not send */
|
||||
if(rank != p-1) {
|
||||
res = NBC_Sched_send((char*)recvbuf+boffset, false, thiscount, datatype, sbpeer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//NBC_PRINT_SCHED(*schedule);
|
||||
|
||||
return NBC_OK;
|
||||
}
|
300
ompi/mca/coll/libnbc/nbc_ialltoall.c
Обычный файл
300
ompi/mca/coll/libnbc/nbc_ialltoall.c
Обычный файл
@ -0,0 +1,300 @@
|
||||
#include "nbc.h"
|
||||
|
||||
static __inline__ int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm);
|
||||
static __inline__ int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm);
|
||||
static __inline__ int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle);
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* tree comparison function for schedule cache */
|
||||
int NBC_Alltoall_args_compare(NBC_Alltoall_args *a, NBC_Alltoall_args *b, void *param) {
|
||||
|
||||
if( (a->sendbuf == b->sendbuf) &&
|
||||
(a->sendcount == b->sendcount) &&
|
||||
(a->sendtype == b->sendtype) &&
|
||||
(a->recvbuf == b->recvbuf) &&
|
||||
(a->recvcount == b->recvcount) &&
|
||||
(a->recvtype == b->recvtype) ) {
|
||||
return 0;
|
||||
}
|
||||
if( a->sendbuf < b->sendbuf ) {
|
||||
return -1;
|
||||
}
|
||||
return +1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* simple linear MPI_Ialltoall the (simple) algorithm just sends to all nodes */
|
||||
int NBC_Ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle) {
|
||||
int rank, p, res, a2asize, sndsize, datasize;
|
||||
NBC_Schedule *schedule;
|
||||
MPI_Aint rcvext, sndext;
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
NBC_Alltoall_args *args, *found, search;
|
||||
#endif
|
||||
char *rbuf, *sbuf, inplace;
|
||||
enum {NBC_A2A_LINEAR, NBC_A2A_PAIRWISE, NBC_A2A_DISS} alg;
|
||||
|
||||
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(sendtype, &sndext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(recvtype, &rcvext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
res = MPI_Type_size(sendtype, &sndsize);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; }
|
||||
|
||||
/* algorithm selection */
|
||||
a2asize = sndsize*sendcount*p;
|
||||
/* this number is optimized for TCP on odin.cs.indiana.edu */
|
||||
if((p <= 8) && ((a2asize < 1<<17) || (sndsize*sendcount < 1<<12))) {
|
||||
/* just send as fast as we can if we have less than 8 peers, if the
|
||||
* total communicated size is smaller than 1<<17 *and* if we don't
|
||||
* have eager messages (msgsize < 1<<13) */
|
||||
alg = NBC_A2A_LINEAR;
|
||||
} else if(a2asize < (1<<12)*p) {
|
||||
alg = NBC_A2A_DISS;
|
||||
} else
|
||||
alg = NBC_A2A_LINEAR; /*NBC_A2A_PAIRWISE*/;
|
||||
|
||||
if(!inplace) {
|
||||
/* copy my data to receive buffer */
|
||||
rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext);
|
||||
sbuf = ((char *)sendbuf) + (rank*sendcount*sndext);
|
||||
res = NBC_Copy(sbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
/* allocate temp buffer if we need one */
|
||||
if(alg == NBC_A2A_DISS) {
|
||||
/* only A2A_DISS needs buffers */
|
||||
if(NBC_Type_intrinsic(sendtype)) {
|
||||
datasize = sndext*sendcount;
|
||||
} else {
|
||||
res = MPI_Pack_size(sendcount, sendtype, comm, &datasize);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; }
|
||||
}
|
||||
/* allocate temporary buffers */
|
||||
if(p % 2 == 0) {
|
||||
handle->tmpbuf=malloc(datasize*p*2);
|
||||
} else {
|
||||
/* we cannot divide p by two, so alloc more to be safe ... */
|
||||
handle->tmpbuf=malloc(datasize*(p/2+1)*2*2);
|
||||
}
|
||||
|
||||
/* phase 1 - rotate n data blocks upwards into the tmpbuffer */
|
||||
if(NBC_Type_intrinsic(sendtype)) {
|
||||
/* contiguous - just copy (1st copy) */
|
||||
memcpy(handle->tmpbuf, (char*)sendbuf+datasize*rank, datasize*(p-rank));
|
||||
if(rank != 0) memcpy((char*)handle->tmpbuf+datasize*(p-rank), sendbuf, datasize*(rank));
|
||||
} else {
|
||||
int pos=0;
|
||||
|
||||
/* non-contiguous - pack */
|
||||
res = MPI_Pack((char*)sendbuf+rank*sendcount*sndext, (p-rank)*sendcount, sendtype, handle->tmpbuf, (p-rank)*datasize, &pos, comm);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; }
|
||||
if(rank != 0) {
|
||||
pos = 0;
|
||||
MPI_Pack(sendbuf, rank*sendcount, sendtype, (char*)handle->tmpbuf+datasize*(p-rank), rank*datasize, &pos, comm);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
} else {
|
||||
handle->tmpbuf=NULL;
|
||||
}
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* search schedule in communicator specific tree */
|
||||
search.sendbuf=sendbuf;
|
||||
search.sendcount=sendcount;
|
||||
search.sendtype=sendtype;
|
||||
search.recvbuf=recvbuf;
|
||||
search.recvcount=recvcount;
|
||||
search.recvtype=recvtype;
|
||||
found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_ALLTOALL], &search);
|
||||
if(found == NULL) {
|
||||
#endif
|
||||
/* not found - generate new schedule */
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
|
||||
|
||||
switch(alg) {
|
||||
case NBC_A2A_LINEAR:
|
||||
res = a2a_sched_linear(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
|
||||
break;
|
||||
case NBC_A2A_DISS:
|
||||
res = a2a_sched_diss(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, handle);
|
||||
break;
|
||||
case NBC_A2A_PAIRWISE:
|
||||
res = a2a_sched_pairwise(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
|
||||
break;
|
||||
}
|
||||
|
||||
if (NBC_OK != res) { return res; }
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* save schedule to tree */
|
||||
args = malloc(sizeof(NBC_Alltoall_args));
|
||||
args->sendbuf=sendbuf;
|
||||
args->sendcount=sendcount;
|
||||
args->sendtype=sendtype;
|
||||
args->recvbuf=recvbuf;
|
||||
args->recvcount=recvcount;
|
||||
args->recvtype=recvtype;
|
||||
args->schedule=schedule;
|
||||
res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_ALLTOALL], args, args, 0);
|
||||
if(res != 0) printf("error in dict_insert() (%i)\n", res);
|
||||
/* increase number of elements for A2A */
|
||||
if(++handle->comminfo->NBC_Dict_size[NBC_ALLTOALL] > SCHED_DICT_UPPER) {
|
||||
NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_ALLTOALL], &handle->comminfo->NBC_Dict_size[NBC_ALLTOALL]);
|
||||
/*if(!rank) printf("[%i] removing %i elements - new size: %i \n", rank, SCHED_DICT_UPPER-SCHED_DICT_LOWER, handle->comminfo->NBC_Alltoall_size);*/
|
||||
}
|
||||
/*if(!rank) printf("[%i] added new schedule to tree - number %i\n", rank, handle->comminfo->NBC_Dict_size[NBC_ALLTOALL]);*/
|
||||
} else {
|
||||
/* found schedule */
|
||||
schedule=found->schedule;
|
||||
}
|
||||
#endif
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
static __inline__ int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) {
|
||||
int res, r, sndpeer, rcvpeer;
|
||||
char *rbuf, *sbuf;
|
||||
|
||||
res = NBC_OK;
|
||||
if(p < 2) return res;
|
||||
|
||||
for(r=1;r<p;r++) {
|
||||
|
||||
sndpeer = (rank+r)%p;
|
||||
rcvpeer = (rank-r+p)%p;
|
||||
|
||||
rbuf = ((char *) recvbuf) + (rcvpeer*recvcount*rcvext);
|
||||
res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, rcvpeer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
sbuf = ((char *) sendbuf) + (sndpeer*sendcount*sndext);
|
||||
res = NBC_Sched_send(sbuf, false, sendcount, sendtype, sndpeer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
|
||||
if (r < p) {
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_barr() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static __inline__ int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) {
|
||||
int res, r;
|
||||
char *rbuf, *sbuf;
|
||||
|
||||
res = NBC_OK;
|
||||
|
||||
for(r=0;r<p;r++) {
|
||||
/* easy algorithm */
|
||||
if ((r == rank)) { continue; }
|
||||
rbuf = ((char *) recvbuf) + (r*recvcount*rcvext);
|
||||
res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, r, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
sbuf = ((char *) sendbuf) + (r*sendcount*sndext);
|
||||
res = NBC_Sched_send(sbuf, false, sendcount, sendtype, r, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static __inline__ int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle) {
|
||||
int res, i, r, speer, rpeer, pos, datasize, offset, virtp;
|
||||
char *rbuf, *rtmpbuf, *stmpbuf;
|
||||
|
||||
res = NBC_OK;
|
||||
if(p < 2) return res;
|
||||
|
||||
if(NBC_Type_intrinsic(sendtype)) {
|
||||
datasize = sndext*sendcount;
|
||||
} else {
|
||||
res = MPI_Pack_size(sendcount, sendtype, comm, &datasize);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
/* allocate temporary buffers */
|
||||
if(p % 2 == 0) {
|
||||
rtmpbuf = (char*)handle->tmpbuf+datasize*p;
|
||||
stmpbuf = (char*)handle->tmpbuf+datasize*(p+p/2);
|
||||
} else {
|
||||
/* we cannot divide p by two, so alloc more to be safe ... */
|
||||
virtp = (p/2+1)*2;
|
||||
rtmpbuf = (char*)handle->tmpbuf+datasize*p;
|
||||
stmpbuf = (char*)handle->tmpbuf+datasize*(p+virtp/2);
|
||||
}
|
||||
|
||||
/* phase 2 - communicate */
|
||||
/*printf("[%i] temp buffer is at %lu of size %i, maxround: %i\n", rank, (unsigned long)handle->tmpbuf, (int)datasize*p*(1<<maxround), maxround);*/
|
||||
for(r = 1; r < p; r<<=1) {
|
||||
offset = 0;
|
||||
for(i=1; i<p; i++) {
|
||||
/* test if bit r is set in rank number i */
|
||||
if((i&r) == r) {
|
||||
/* copy data to sendbuffer (2nd copy) - could be avoided using iovecs */
|
||||
/*printf("[%i] round %i: copying element %i to buffer %lu\n", rank, r, i, (unsigned long)(stmpbuf+offset));*/
|
||||
NBC_Sched_copy((void*)(long)(i*datasize), true, datasize, MPI_BYTE, stmpbuf+offset-(unsigned long)handle->tmpbuf, true, datasize, MPI_BYTE, schedule);
|
||||
offset += datasize;
|
||||
}
|
||||
}
|
||||
|
||||
speer = ( rank + r) % p;
|
||||
/* add p because modulo does not work with negative values */
|
||||
rpeer = ((rank - r)+p) % p;
|
||||
|
||||
/*printf("[%i] receiving %i bytes from host %i into rbuf %lu\n", rank, offset, rpeer, (unsigned long)rtmpbuf);*/
|
||||
res = NBC_Sched_recv(rtmpbuf-(unsigned long)handle->tmpbuf, true, offset, MPI_BYTE, rpeer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
|
||||
/*printf("[%i] sending %i bytes to host %i from sbuf %lu\n", rank, offset, speer, (unsigned long)stmpbuf);*/
|
||||
res = NBC_Sched_send(stmpbuf-(unsigned long)handle->tmpbuf, true, offset, MPI_BYTE, speer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
|
||||
/* unpack from buffer */
|
||||
offset = 0;
|
||||
for(i=1; i<p; i++) {
|
||||
/* test if bit r is set in rank number i */
|
||||
if((i&r) == r) {
|
||||
/* copy data to tmpbuffer (3rd copy) - could be avoided using iovecs */
|
||||
NBC_Sched_copy(rtmpbuf+offset-(unsigned long)handle->tmpbuf, true, datasize, MPI_BYTE, (void*)(long)(i*datasize), true, datasize, MPI_BYTE, schedule);
|
||||
offset += datasize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* phase 3 - reorder - data is now in wrong order in handle->tmpbuf -
|
||||
* reorder it into recvbuf */
|
||||
for(i=0; i<p; i++) {
|
||||
rbuf = (char*)recvbuf+((rank-i+p)%p)*recvcount*rcvext;
|
||||
pos = 0;
|
||||
res = NBC_Sched_unpack((void*)(long)(i*datasize), true, recvcount, recvtype, rbuf, false, schedule);
|
||||
if (NBC_OK != res) { printf("MPI Error in NBC_Sched_pack() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
return NBC_OK;
|
||||
}
|
67
ompi/mca/coll/libnbc/nbc_ialltoallv.c
Обычный файл
67
ompi/mca/coll/libnbc/nbc_ialltoallv.c
Обычный файл
@ -0,0 +1,67 @@
|
||||
#include "nbc.h"
|
||||
|
||||
/* an alltoallv schedule can not be cached easily because the contents
|
||||
* ot the recvcounts array may change, so a comparison of the address
|
||||
* would not be sufficient ... we simply do not cache it */
|
||||
|
||||
/* simple linear Alltoallv */
|
||||
int NBC_Ialltoallv(void* sendbuf, int *sendcounts, int *sdispls,
|
||||
MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *rdispls,
|
||||
MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle* handle) {
|
||||
|
||||
int rank, p, res, i;
|
||||
MPI_Aint sndext, rcvext;
|
||||
NBC_Schedule *schedule;
|
||||
char *rbuf, *sbuf, inplace;
|
||||
|
||||
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res= MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(sendtype, &sndext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(recvtype, &rcvext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
|
||||
|
||||
handle->tmpbuf=NULL;
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
|
||||
|
||||
/* copy data to receivbuffer */
|
||||
if((sendcounts[rank] != 0) && !inplace) {
|
||||
rbuf = ((char *) recvbuf) + (rdispls[rank] * rcvext);
|
||||
sbuf = ((char *) sendbuf) + (sdispls[rank] * sndext);
|
||||
res = NBC_Copy(sbuf, sendcounts[rank], sendtype, rbuf, recvcounts[rank], recvtype, comm);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
for (i = 0; i < p; i++) {
|
||||
if (i == rank) { continue; }
|
||||
/* post all sends */
|
||||
sbuf = ((char *) sendbuf) + (sdispls[i] * sndext);
|
||||
res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
/* post all receives */
|
||||
rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext);
|
||||
res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
/*NBC_PRINT_SCHED(*schedule);*/
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
75
ompi/mca/coll/libnbc/nbc_ibarrier.c
Обычный файл
75
ompi/mca/coll/libnbc/nbc_ibarrier.c
Обычный файл
@ -0,0 +1,75 @@
|
||||
#include "nbc.h"
|
||||
|
||||
/* Dissemination implementation of MPI_Ibarrier */
|
||||
int NBC_Ibarrier(MPI_Comm comm, NBC_Handle* handle) {
|
||||
int round, rank, p, maxround, res, recvpeer, sendpeer;
|
||||
NBC_Schedule *schedule;
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
|
||||
|
||||
handle->tmpbuf=NULL;
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* there only one argument set per communicator -> hang it directly at
|
||||
* the tree-position, NBC_Dict_size[...] is 0 for not initialized and
|
||||
* 1 for initialized. NBC_Dict[...] is a pointer to the schedule in
|
||||
* this case */
|
||||
if(handle->comminfo->NBC_Dict_size[NBC_BARRIER] == 0) {
|
||||
/* we did not init it yet */
|
||||
#endif
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
|
||||
|
||||
round = -1;
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
|
||||
|
||||
maxround = (int)ceil((log(p)/LOG2)-1);
|
||||
|
||||
do {
|
||||
round++;
|
||||
sendpeer = (rank + (1<<round)) % p;
|
||||
/* add p because modulo does not work with negative values */
|
||||
recvpeer = ((rank - (1<<round))+p) % p;
|
||||
|
||||
/* send msg to sendpeer */
|
||||
res = NBC_Sched_send(NULL, false, 0, MPI_BYTE, sendpeer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
|
||||
/* recv msg from recvpeer */
|
||||
res = NBC_Sched_recv(NULL, false, 0, MPI_BYTE, recvpeer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
/* end communication round */
|
||||
if(round < maxround){
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
}
|
||||
} while (round < maxround);
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* add it */
|
||||
handle->comminfo->NBC_Dict[NBC_BARRIER] = (hb_tree*)schedule;
|
||||
handle->comminfo->NBC_Dict_size[NBC_BARRIER] = 1;
|
||||
} else {
|
||||
/* we found it */
|
||||
schedule = (NBC_Schedule*)handle->comminfo->NBC_Dict[NBC_BARRIER];
|
||||
}
|
||||
#endif
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/*void NBC_IBARRIER(MPI_Fint *comm, MPI_Fint *ierr) {
|
||||
*ierr = NBC_Ibarrier(MPI_Comm comm, NBC_Handle* handle);
|
||||
}*/
|
234
ompi/mca/coll/libnbc/nbc_ibcast.c
Обычный файл
234
ompi/mca/coll/libnbc/nbc_ibcast.c
Обычный файл
@ -0,0 +1,234 @@
|
||||
#include "nbc.h"
|
||||
|
||||
static __inline__ int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype);
|
||||
static __inline__ int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype);
|
||||
static __inline__ int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype, int fragsize, int size);
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* tree comparison function for schedule cache */
|
||||
int NBC_Bcast_args_compare(NBC_Bcast_args *a, NBC_Bcast_args *b, void *param) {
|
||||
|
||||
if( (a->buffer == b->buffer) &&
|
||||
(a->count == b->count) &&
|
||||
(a->datatype == b->datatype) &&
|
||||
(a->root == b->root) ) {
|
||||
return 0;
|
||||
}
|
||||
if( a->buffer < b->buffer ) {
|
||||
return -1;
|
||||
}
|
||||
return +1;
|
||||
}
|
||||
#endif
|
||||
|
||||
int NBC_Ibcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, NBC_Handle* handle) {
|
||||
int rank, p, res, size, segsize;
|
||||
NBC_Schedule *schedule;
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
NBC_Bcast_args *args, *found, search;
|
||||
#endif
|
||||
enum { NBC_BCAST_LINEAR, NBC_BCAST_BINOMIAL, NBC_BCAST_CHAIN } alg;
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Type_size(datatype, &size);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; }
|
||||
|
||||
/* algorithm selection */
|
||||
if(p <= 4) {
|
||||
alg = NBC_BCAST_LINEAR;
|
||||
} else if(size*count < 65536) {
|
||||
alg = NBC_BCAST_BINOMIAL;
|
||||
} else if(size*count < 524288) {
|
||||
alg = NBC_BCAST_CHAIN;
|
||||
segsize = 16384/2;
|
||||
} else {
|
||||
alg = NBC_BCAST_CHAIN;
|
||||
segsize = 65536/2;
|
||||
}
|
||||
|
||||
handle->tmpbuf=NULL;
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* search schedule in communicator specific tree */
|
||||
search.buffer=buffer;
|
||||
search.count=count;
|
||||
search.datatype=datatype;
|
||||
search.root=root;
|
||||
found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_BCAST], &search);
|
||||
if(found == NULL) {
|
||||
#endif
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; }
|
||||
|
||||
switch(alg) {
|
||||
case NBC_BCAST_LINEAR:
|
||||
res = bcast_sched_linear(rank, p, root, schedule, buffer, count, datatype);
|
||||
break;
|
||||
case NBC_BCAST_BINOMIAL:
|
||||
res = bcast_sched_binomial(rank, p, root, schedule, buffer, count, datatype);
|
||||
break;
|
||||
case NBC_BCAST_CHAIN:
|
||||
res = bcast_sched_chain(rank, p, root, schedule, buffer, count, datatype, segsize, size);
|
||||
break;
|
||||
}
|
||||
if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; }
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* save schedule to tree */
|
||||
args = malloc(sizeof(NBC_Bcast_args));
|
||||
args->buffer=buffer;
|
||||
args->count=count;
|
||||
args->datatype=datatype;
|
||||
args->root=root;
|
||||
args->schedule=schedule;
|
||||
res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_BCAST], args, args, 0);
|
||||
if(res != 0) printf("error in dict_insert() (%i)\n", res);
|
||||
/* increase number of elements for A2A */
|
||||
if(++handle->comminfo->NBC_Dict_size[NBC_BCAST] > SCHED_DICT_UPPER) {
|
||||
NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_BCAST], &handle->comminfo->NBC_Dict_size[NBC_BCAST]);
|
||||
}
|
||||
} else {
|
||||
/* found schedule */
|
||||
schedule=found->schedule;
|
||||
}
|
||||
#endif
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* better binomial bcast
|
||||
* working principle:
|
||||
* - each node gets a virtual rank vrank
|
||||
* - the 'root' node get vrank 0
|
||||
* - node 0 gets the vrank of the 'root'
|
||||
* - all other ranks stay identical (they do not matter)
|
||||
*
|
||||
* Algorithm:
|
||||
* - each node with vrank > 2^r and vrank < 2^r+1 receives from node
|
||||
* vrank - 2^r (vrank=1 receives from 0, vrank 0 receives never)
|
||||
* - each node sends each round r to node vrank + 2^r
|
||||
* - a node stops to send if 2^r > commsize
|
||||
*/
|
||||
#define RANK2VRANK(rank, vrank, root) \
|
||||
{ \
|
||||
vrank = rank; \
|
||||
if (rank == 0) vrank = root; \
|
||||
if (rank == root) vrank = 0; \
|
||||
}
|
||||
#define VRANK2RANK(rank, vrank, root) \
|
||||
{ \
|
||||
rank = vrank; \
|
||||
if (vrank == 0) rank = root; \
|
||||
if (vrank == root) rank = 0; \
|
||||
}
|
||||
static __inline__ int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype) {
|
||||
int maxr, vrank, peer, r, res;
|
||||
|
||||
maxr = (int)ceil((log(p)/LOG2));
|
||||
|
||||
RANK2VRANK(rank, vrank, root);
|
||||
|
||||
/* receive from the right hosts */
|
||||
if(vrank != 0) {
|
||||
for(r=0; r<maxr; r++) {
|
||||
if((vrank >= (1<<r)) && (vrank < (1<<(r+1)))) {
|
||||
VRANK2RANK(peer, vrank-(1<<r), root);
|
||||
res = NBC_Sched_recv(buffer, false, count, datatype, peer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
/* now send to the right hosts */
|
||||
for(r=0; r<maxr; r++) {
|
||||
if(((vrank + (1<<r) < p) && (vrank < (1<<r))) || (vrank == 0)) {
|
||||
VRANK2RANK(peer, vrank+(1<<r), root);
|
||||
res = NBC_Sched_send(buffer, false, count, datatype, peer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* simple linear MPI_Ibcast */
|
||||
static __inline__ int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype) {
|
||||
int peer, res;
|
||||
|
||||
/* send to all others */
|
||||
if(rank == root) {
|
||||
for (peer=0; peer<p;peer++) {
|
||||
if(peer != root) {
|
||||
/* send msg to peer */
|
||||
res = NBC_Sched_send(buffer, false, count, datatype, peer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* recv msg from root */
|
||||
res = NBC_Sched_recv(buffer, false, count, datatype, root, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* simple chained MPI_Ibcast */
|
||||
static __inline__ int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype, int fragsize, int size) {
|
||||
int res, vrank, rpeer, speer, numfrag, fragnum, fragcount, thiscount;
|
||||
MPI_Aint ext;
|
||||
char *buf;
|
||||
|
||||
RANK2VRANK(rank, vrank, root);
|
||||
VRANK2RANK(rpeer, vrank-1, root);
|
||||
VRANK2RANK(speer, vrank+1, root);
|
||||
res = MPI_Type_extent(datatype, &ext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
|
||||
if(count == 0) return NBC_OK;
|
||||
|
||||
numfrag = count*size/fragsize;
|
||||
if((count*size)%fragsize != 0) numfrag++;
|
||||
fragcount = count/numfrag;
|
||||
//if(!rank) printf("numfrag: %i, count: %i, size: %i, fragcount: %i\n", numfrag, count, size, fragcount);
|
||||
|
||||
for(fragnum = 0; fragnum < numfrag; fragnum++) {
|
||||
buf = (char*)buffer+fragnum*fragcount*ext;
|
||||
thiscount = fragcount;
|
||||
if(fragnum == numfrag-1) {
|
||||
/* last fragment may not be full */
|
||||
thiscount = count-fragcount*fragnum;
|
||||
}
|
||||
|
||||
/* root does not receive */
|
||||
if(vrank != 0) {
|
||||
res = NBC_Sched_recv(buf, false, thiscount, datatype, rpeer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
}
|
||||
|
||||
/* last rank does not send */
|
||||
if(vrank != p-1) {
|
||||
res = NBC_Sched_send(buf, false, thiscount, datatype, speer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
/* this barrier here seems awaward but isn't!!!! */
|
||||
if(vrank == 0) res = NBC_Sched_barrier(schedule);
|
||||
}
|
||||
}
|
||||
|
||||
return NBC_OK;
|
||||
}
|
117
ompi/mca/coll/libnbc/nbc_igather.c
Обычный файл
117
ompi/mca/coll/libnbc/nbc_igather.c
Обычный файл
@ -0,0 +1,117 @@
|
||||
#include "nbc.h"
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* tree comparison function for schedule cache */
|
||||
int NBC_Gather_args_compare(NBC_Gather_args *a, NBC_Gather_args *b, void *param) {
|
||||
|
||||
if( (a->sendbuf == b->sendbuf) &&
|
||||
(a->sendcount == b->sendcount) &&
|
||||
(a->sendtype == b->sendtype) &&
|
||||
(a->recvbuf == b->recvbuf) &&
|
||||
(a->recvcount == b->recvcount) &&
|
||||
(a->recvtype == b->recvtype) &&
|
||||
(a->root == b->root) ) {
|
||||
return 0;
|
||||
}
|
||||
if( a->sendbuf < b->sendbuf ) {
|
||||
return -1;
|
||||
}
|
||||
return +1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* simple linear MPI_Igather */
|
||||
int NBC_Igather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle) {
|
||||
int rank, p, res, i;
|
||||
MPI_Aint rcvext;
|
||||
NBC_Schedule *schedule;
|
||||
char *rbuf, inplace;
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
NBC_Gather_args *args, *found, search;
|
||||
#endif
|
||||
|
||||
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(recvtype, &rcvext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
|
||||
handle->tmpbuf = NULL;
|
||||
|
||||
if((rank == root) && (!inplace)) {
|
||||
rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext);
|
||||
/* if I am the root - just copy the message (only without MPI_IN_PLACE) */
|
||||
res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* search schedule in communicator specific tree */
|
||||
search.sendbuf=sendbuf;
|
||||
search.sendcount=sendcount;
|
||||
search.sendtype=sendtype;
|
||||
search.recvbuf=recvbuf;
|
||||
search.recvcount=recvcount;
|
||||
search.recvtype=recvtype;
|
||||
search.root=root;
|
||||
found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_GATHER], &search);
|
||||
if(found == NULL) {
|
||||
#endif
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
|
||||
|
||||
/* send to root */
|
||||
if(rank != root) {
|
||||
/* send msg to root */
|
||||
res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
} else {
|
||||
for(i=0;i<p;i++) {
|
||||
rbuf = ((char *)recvbuf) + (i*recvcount*rcvext);
|
||||
if(i != root) {
|
||||
/* root receives message to the right buffer */
|
||||
res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, i, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* save schedule to tree */
|
||||
args = malloc(sizeof(NBC_Gather_args));
|
||||
args->sendbuf=sendbuf;
|
||||
args->sendcount=sendcount;
|
||||
args->sendtype=sendtype;
|
||||
args->recvbuf=recvbuf;
|
||||
args->recvcount=recvcount;
|
||||
args->recvtype=recvtype;
|
||||
args->root=root;
|
||||
args->schedule=schedule;
|
||||
res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_GATHER], args, args, 0);
|
||||
if(res != 0) printf("error in dict_insert() (%i)\n", res);
|
||||
/* increase number of elements for A2A */
|
||||
if(++handle->comminfo->NBC_Dict_size[NBC_GATHER] > SCHED_DICT_UPPER) {
|
||||
NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_GATHER], &handle->comminfo->NBC_Dict_size[NBC_GATHER]);
|
||||
}
|
||||
} else {
|
||||
/* found schedule */
|
||||
schedule=found->schedule;
|
||||
}
|
||||
#endif
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
62
ompi/mca/coll/libnbc/nbc_igatherv.c
Обычный файл
62
ompi/mca/coll/libnbc/nbc_igatherv.c
Обычный файл
@ -0,0 +1,62 @@
|
||||
#include "nbc.h"
|
||||
|
||||
/* an gatherv schedule can not be cached easily because the contents
|
||||
* ot the recvcounts array may change, so a comparison of the address
|
||||
* would not be sufficient ... we simply do not cache it */
|
||||
|
||||
/* simple linear MPI_Igatherv */
|
||||
int NBC_Igatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle) {
|
||||
int rank, p, res, i;
|
||||
MPI_Aint rcvext;
|
||||
NBC_Schedule *schedule;
|
||||
char *rbuf, inplace;
|
||||
|
||||
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(recvtype, &rcvext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
|
||||
|
||||
handle->tmpbuf=NULL;
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
|
||||
|
||||
/* send to root */
|
||||
if(rank != root) {
|
||||
/* send msg to root */
|
||||
res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
} else {
|
||||
for(i=0;i<p;i++) {
|
||||
rbuf = ((char *)recvbuf) + (displs[i]*rcvext);
|
||||
if(i == root) {
|
||||
if(!inplace) {
|
||||
/* if I am the root - just copy the message */
|
||||
res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcounts[i], recvtype, comm);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
|
||||
}
|
||||
} else {
|
||||
/* root receives message to the right buffer */
|
||||
res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
277
ompi/mca/coll/libnbc/nbc_ireduce.c
Обычный файл
277
ompi/mca/coll/libnbc/nbc_ireduce.c
Обычный файл
@ -0,0 +1,277 @@
|
||||
#include "nbc.h"
|
||||
static __inline__ int red_sched_binomial(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, char *redbuf, NBC_Schedule *schedule, NBC_Handle *handle);
|
||||
static __inline__ int red_sched_chain(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize);
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* tree comparison function for schedule cache */
|
||||
int NBC_Reduce_args_compare(NBC_Reduce_args *a, NBC_Reduce_args *b, void *param) {
|
||||
|
||||
if( (a->sendbuf == b->sendbuf) &&
|
||||
(a->recvbuf == b->recvbuf) &&
|
||||
(a->count == b->count) &&
|
||||
(a->datatype == b->datatype) &&
|
||||
(a->op == b->op) &&
|
||||
(a->root == b->root) ) {
|
||||
return 0;
|
||||
}
|
||||
if( a->sendbuf < b->sendbuf ) {
|
||||
return -1;
|
||||
}
|
||||
return +1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* the non-blocking reduce */
|
||||
int NBC_Ireduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, NBC_Handle* handle) {
|
||||
int rank, p, res, segsize, size;
|
||||
MPI_Aint ext;
|
||||
NBC_Schedule *schedule;
|
||||
char *redbuf=NULL, inplace;
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
NBC_Reduce_args *args, *found, search;
|
||||
#endif
|
||||
enum { NBC_RED_BINOMIAL, NBC_RED_CHAIN } alg;
|
||||
|
||||
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(datatype, &ext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
res = MPI_Type_size(datatype, &size);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
|
||||
/* only one node -> copy data */
|
||||
if((p == 1) && !inplace) {
|
||||
res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
/* algorithm selection */
|
||||
if(p < 4 || size*count < 65536) {
|
||||
alg = NBC_RED_BINOMIAL;
|
||||
if(rank == root) {
|
||||
/* root reduces in receivebuffer */
|
||||
handle->tmpbuf = malloc(ext*count);
|
||||
} else {
|
||||
/* recvbuf may not be valid on non-root nodes */
|
||||
handle->tmpbuf = malloc(ext*count*2);
|
||||
redbuf = ((char*)handle->tmpbuf)+(ext*count);
|
||||
}
|
||||
} else {
|
||||
handle->tmpbuf = malloc(ext*count);
|
||||
alg = NBC_RED_CHAIN;
|
||||
segsize = 16384/2;
|
||||
}
|
||||
if (NULL == handle->tmpbuf) { printf("Error in malloc() (%i)\n", res); return res; }
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* search schedule in communicator specific tree */
|
||||
search.sendbuf=sendbuf;
|
||||
search.recvbuf=recvbuf;
|
||||
search.count=count;
|
||||
search.datatype=datatype;
|
||||
search.op=op;
|
||||
search.root=root;
|
||||
found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_REDUCE], &search);
|
||||
if(found == NULL) {
|
||||
#endif
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
|
||||
|
||||
switch(alg) {
|
||||
case NBC_RED_BINOMIAL:
|
||||
res = red_sched_binomial(rank, p, root, sendbuf, recvbuf, count, datatype, op, redbuf, schedule, handle);
|
||||
break;
|
||||
case NBC_RED_CHAIN:
|
||||
res = red_sched_chain(rank, p, root, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, handle, segsize);
|
||||
break;
|
||||
}
|
||||
if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; }
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* save schedule to tree */
|
||||
args = malloc(sizeof(NBC_Alltoall_args));
|
||||
args->sendbuf=sendbuf;
|
||||
args->recvbuf=recvbuf;
|
||||
args->count=count;
|
||||
args->datatype=datatype;
|
||||
args->op=op;
|
||||
args->root=root;
|
||||
args->schedule=schedule;
|
||||
res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_REDUCE], args, args, 0);
|
||||
if(res != 0) printf("error in dict_insert() (%i)\n", res);
|
||||
/* increase number of elements for Reduce */
|
||||
if(++handle->comminfo->NBC_Dict_size[NBC_REDUCE] > SCHED_DICT_UPPER) {
|
||||
NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_REDUCE], &handle->comminfo->NBC_Dict_size[NBC_REDUCE]);
|
||||
}
|
||||
} else {
|
||||
/* found schedule */
|
||||
schedule=found->schedule;
|
||||
}
|
||||
#endif
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
/* tmpbuf is freed with the handle */
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
|
||||
/* binomial reduce
|
||||
* working principle:
|
||||
* - each node gets a virtual rank vrank
|
||||
* - the 'root' node get vrank 0
|
||||
* - node 0 gets the vrank of the 'root'
|
||||
* - all other ranks stay identical (they do not matter)
|
||||
*
|
||||
* Algorithm:
|
||||
* pairwise exchange
|
||||
* round r:
|
||||
* grp = rank % 2^r
|
||||
* if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value
|
||||
* if grp == 1: send to rank - 2^(r-1) and exit function
|
||||
*
|
||||
* do this for R=log_2(p) rounds
|
||||
*
|
||||
*/
|
||||
#define RANK2VRANK(rank, vrank, root) \
|
||||
{ \
|
||||
vrank = rank; \
|
||||
if (rank == 0) vrank = root; \
|
||||
if (rank == root) vrank = 0; \
|
||||
}
|
||||
#define VRANK2RANK(rank, vrank, root) \
|
||||
{ \
|
||||
rank = vrank; \
|
||||
if (vrank == 0) rank = root; \
|
||||
if (vrank == root) rank = 0; \
|
||||
}
|
||||
static __inline__ int red_sched_binomial(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, char *redbuf, NBC_Schedule *schedule, NBC_Handle *handle) {
|
||||
int firstred, vrank, vpeer, peer, res, maxr, r;
|
||||
|
||||
RANK2VRANK(rank, vrank, root);
|
||||
maxr = (int)ceil((log(p)/LOG2));
|
||||
|
||||
firstred = 1;
|
||||
for(r=1; r<=maxr; r++) {
|
||||
if((vrank % (1<<r)) == 0) {
|
||||
/* we have to receive this round */
|
||||
vpeer = vrank + (1<<(r-1));
|
||||
VRANK2RANK(peer, vpeer, root)
|
||||
if(peer<p) {
|
||||
res = NBC_Sched_recv(0, true, count, datatype, peer, schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
/* we have to wait until we have the data */
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
/* perform the reduce in my local buffer */
|
||||
if(firstred) {
|
||||
if(rank == root) {
|
||||
/* root is the only one who reduces in the receivebuffer
|
||||
* take data from sendbuf in first round - save copy */
|
||||
res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule);
|
||||
} else {
|
||||
/* all others may not have a receive buffer
|
||||
* take data from sendbuf in first round - save copy */
|
||||
res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, sendbuf, false, 0, true, count, datatype, op, schedule);
|
||||
}
|
||||
firstred = 0;
|
||||
} else {
|
||||
if(rank == root) {
|
||||
/* root is the only one who reduces in the receivebuffer */
|
||||
res = NBC_Sched_op(recvbuf, false, recvbuf, false, 0, true, count, datatype, op, schedule);
|
||||
} else {
|
||||
/* all others may not have a receive buffer */
|
||||
res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, redbuf-(unsigned long)handle->tmpbuf, true, 0, true, count, datatype, op, schedule);
|
||||
}
|
||||
}
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; }
|
||||
/* this cannot be done until handle->tmpbuf is unused :-( */
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
}
|
||||
} else {
|
||||
/* we have to send this round */
|
||||
vpeer = vrank - (1<<(r-1));
|
||||
VRANK2RANK(peer, vpeer, root)
|
||||
if(firstred) {
|
||||
/* we did not reduce anything */
|
||||
res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule);
|
||||
} else {
|
||||
/* we have to use the redbuf the root (which works in receivebuf) is never sending .. */
|
||||
res = NBC_Sched_send(redbuf-(unsigned long)handle->tmpbuf, true, count, datatype, peer, schedule);
|
||||
}
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
/* leave the game */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
/* chain send ... */
|
||||
static __inline__ int red_sched_chain(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize) {
|
||||
int res, vrank, rpeer, speer, numfrag, fragnum, fragcount, thiscount;
|
||||
long offset;
|
||||
|
||||
RANK2VRANK(rank, vrank, root);
|
||||
VRANK2RANK(rpeer, vrank+1, root);
|
||||
VRANK2RANK(speer, vrank-1, root);
|
||||
|
||||
if(count == 0) return NBC_OK;
|
||||
|
||||
numfrag = count*size/fragsize;
|
||||
if((count*size)%fragsize != 0) numfrag++;
|
||||
fragcount = count/numfrag;
|
||||
/*printf("numfrag: %i, count: %i, size: %i, fragcount: %i\n", numfrag, count, size, fragcount);*/
|
||||
|
||||
for(fragnum = 0; fragnum < numfrag; fragnum++) {
|
||||
offset = fragnum*fragcount*ext;
|
||||
thiscount = fragcount;
|
||||
if(fragnum == numfrag-1) {
|
||||
/* last fragment may not be full */
|
||||
thiscount = count-fragcount*fragnum;
|
||||
}
|
||||
|
||||
/* last node does not recv */
|
||||
if(vrank != p-1) {
|
||||
res = NBC_Sched_recv((char*)offset, true, thiscount, datatype, rpeer, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
/* root reduces into receivebuf */
|
||||
if(vrank == 0) {
|
||||
res = NBC_Sched_op((char*)recvbuf+offset, false, (char*)sendbuf+offset, false, (char*)offset, true, thiscount, datatype, op, schedule);
|
||||
} else {
|
||||
res = NBC_Sched_op((char*)offset, true, (char*)sendbuf+offset, false, (char*)offset, true, thiscount, datatype, op, schedule);
|
||||
}
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
}
|
||||
|
||||
/* root does not send */
|
||||
if(vrank != 0) {
|
||||
/* rank p-1 has to send out of sendbuffer :) */
|
||||
if(vrank == p-1) {
|
||||
res = NBC_Sched_send((char*)sendbuf+offset, false, thiscount, datatype, speer, schedule);
|
||||
} else {
|
||||
res = NBC_Sched_send((char*)offset, true, thiscount, datatype, speer, schedule);
|
||||
}
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
/* this barrier here seems awkward but isn't!!!! */
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
}
|
||||
}
|
||||
|
||||
return NBC_OK;
|
||||
}
|
132
ompi/mca/coll/libnbc/nbc_ireduce_scatter.c
Обычный файл
132
ompi/mca/coll/libnbc/nbc_ireduce_scatter.c
Обычный файл
@ -0,0 +1,132 @@
|
||||
#include "nbc.h"
|
||||
|
||||
/* an reduce_csttare schedule can not be cached easily because the contents
|
||||
* ot the recvcounts array may change, so a comparison of the address
|
||||
* would not be sufficient ... we simply do not cache it */
|
||||
|
||||
/* binomial reduce to rank 0 followed by a linear scatter ...
|
||||
*
|
||||
* Algorithm:
|
||||
* pairwise exchange
|
||||
* round r:
|
||||
* grp = rank % 2^r
|
||||
* if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value
|
||||
* if grp == 1: send to rank - 2^(r-1) and exit function
|
||||
*
|
||||
* do this for R=log_2(p) rounds
|
||||
*
|
||||
*/
|
||||
int NBC_Ireduce_scatter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, NBC_Handle* handle) {
|
||||
int peer, rank, maxr, p, r, res, count, offset, firstred;
|
||||
MPI_Aint ext;
|
||||
char *redbuf, *sbuf, inplace;
|
||||
NBC_Schedule *schedule;
|
||||
|
||||
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
|
||||
MPI_Type_extent(datatype, &ext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; }
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
|
||||
|
||||
maxr = (int)ceil((log(p)/LOG2));
|
||||
|
||||
count = 0;
|
||||
for(r=0;r<p;r++) count += recvcounts[r];
|
||||
|
||||
handle->tmpbuf = malloc(ext*count*2);
|
||||
if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; }
|
||||
|
||||
redbuf = ((char*)handle->tmpbuf)+(ext*count);
|
||||
|
||||
/* copy data to redbuf if we only have a single node */
|
||||
if((p==1) && !inplace) {
|
||||
res = NBC_Copy(sendbuf, count, datatype, redbuf, count, datatype, comm);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
firstred = 1;
|
||||
for(r=1; r<=maxr; r++) {
|
||||
if((rank % (1<<r)) == 0) {
|
||||
/* we have to receive this round */
|
||||
peer = rank + (1<<(r-1));
|
||||
if(peer<p) {
|
||||
res = NBC_Sched_recv(0, true, count, datatype, peer, schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
/* we have to wait until we have the data */
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
if(firstred) {
|
||||
/* take reduce data from the sendbuf in the first round -> save copy */
|
||||
res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, sendbuf, false, 0, true, count, datatype, op, schedule);
|
||||
firstred = 0;
|
||||
} else {
|
||||
/* perform the reduce in my local buffer */
|
||||
res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, redbuf-(unsigned long)handle->tmpbuf, true, 0, true, count, datatype, op, schedule);
|
||||
}
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; }
|
||||
/* this cannot be done until handle->tmpbuf is unused :-( */
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
}
|
||||
} else {
|
||||
/* we have to send this round */
|
||||
peer = rank - (1<<(r-1));
|
||||
if(firstred) {
|
||||
/* we have to send the senbuf */
|
||||
res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule);
|
||||
} else {
|
||||
/* we send an already reduced value from redbuf */
|
||||
res = NBC_Sched_send(redbuf-(unsigned long)handle->tmpbuf, true, count, datatype, peer, schedule);
|
||||
}
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
/* leave the game */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
|
||||
/* rank 0 is root and sends - all others receive */
|
||||
if(rank != 0) {
|
||||
res = NBC_Sched_recv(recvbuf, false, recvcounts[rank], datatype, 0, schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
if(rank == 0) {
|
||||
offset = 0;
|
||||
for(r=1;r<p;r++) {
|
||||
offset += recvcounts[r-1];
|
||||
sbuf = ((char *)redbuf) + (offset*ext);
|
||||
/* root sends the right buffer to the right receiver */
|
||||
res = NBC_Sched_send(sbuf-(unsigned long)handle->tmpbuf, true, recvcounts[r], datatype, r, schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
}
|
||||
res = NBC_Sched_copy(redbuf-(unsigned long)handle->tmpbuf, true, recvcounts[0], datatype, recvbuf, false, recvcounts[0], datatype, schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_copy() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
/*NBC_PRINT_SCHED(*schedule);*/
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
/* tmpbuf is freed with the handle */
|
||||
return NBC_OK;
|
||||
}
|
||||
|
||||
|
120
ompi/mca/coll/libnbc/nbc_iscan.c
Обычный файл
120
ompi/mca/coll/libnbc/nbc_iscan.c
Обычный файл
@ -0,0 +1,120 @@
|
||||
#include "nbc.h"
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* tree comparison function for schedule cache */
|
||||
int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param) {
|
||||
|
||||
if( (a->sendbuf == b->sendbuf) &&
|
||||
(a->recvbuf == b->recvbuf) &&
|
||||
(a->count == b->count) &&
|
||||
(a->datatype == b->datatype) &&
|
||||
(a->op == b->op) ) {
|
||||
return 0;
|
||||
}
|
||||
if( a->sendbuf < b->sendbuf ) {
|
||||
return -1;
|
||||
}
|
||||
return +1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* linear iscan
|
||||
* working principle:
|
||||
* 1. each node (but node 0) receives from left neigbor
|
||||
* 2. performs op
|
||||
* 3. all but rank p-1 do sends to it's right neigbor and exits
|
||||
*
|
||||
*/
|
||||
int NBC_Iscan(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, NBC_Handle* handle) {
|
||||
int rank, p, res;
|
||||
MPI_Aint ext;
|
||||
NBC_Schedule *schedule;
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
NBC_Scan_args *args, *found, search;
|
||||
#endif
|
||||
char inplace;
|
||||
|
||||
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(datatype, &ext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
|
||||
handle->tmpbuf = malloc(ext*count);
|
||||
if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; }
|
||||
|
||||
if((rank == 0) && !inplace) {
|
||||
/* copy data to receivebuf */
|
||||
res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* search schedule in communicator specific tree */
|
||||
search.sendbuf=sendbuf;
|
||||
search.recvbuf=recvbuf;
|
||||
search.count=count;
|
||||
search.datatype=datatype;
|
||||
search.op=op;
|
||||
found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_SCAN], &search);
|
||||
if(found == NULL) {
|
||||
#endif
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
|
||||
|
||||
if(rank != 0) {
|
||||
res = NBC_Sched_recv(0, true, count, datatype, rank-1, schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
/* we have to wait until we have the data */
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
/* perform the reduce in my local buffer */
|
||||
res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; }
|
||||
/* this cannot be done until handle->tmpbuf is unused :-( */
|
||||
res = NBC_Sched_barrier(schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
|
||||
}
|
||||
if(rank != p-1) {
|
||||
res = NBC_Sched_send(recvbuf, false, count, datatype, rank+1, schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* save schedule to tree */
|
||||
args = malloc(sizeof(NBC_Alltoall_args));
|
||||
args->sendbuf=sendbuf;
|
||||
args->recvbuf=recvbuf;
|
||||
args->count=count;
|
||||
args->datatype=datatype;
|
||||
args->op=op;
|
||||
args->schedule=schedule;
|
||||
res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_SCAN], args, args, 0);
|
||||
if(res != 0) printf("error in dict_insert() (%i)\n", res);
|
||||
/* increase number of elements for A2A */
|
||||
if(++handle->comminfo->NBC_Dict_size[NBC_SCAN] > SCHED_DICT_UPPER) {
|
||||
NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_SCAN], &handle->comminfo->NBC_Dict_size[NBC_SCAN]);
|
||||
}
|
||||
} else {
|
||||
/* found schedule */
|
||||
schedule=found->schedule;
|
||||
}
|
||||
#endif
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
/* tmpbuf is freed with the handle */
|
||||
return NBC_OK;
|
||||
}
|
117
ompi/mca/coll/libnbc/nbc_iscatter.c
Обычный файл
117
ompi/mca/coll/libnbc/nbc_iscatter.c
Обычный файл
@ -0,0 +1,117 @@
|
||||
#include "nbc.h"
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* tree comparison function for schedule cache */
|
||||
int NBC_Scatter_args_compare(NBC_Scatter_args *a, NBC_Scatter_args *b, void *param) {
|
||||
|
||||
if( (a->sendbuf == b->sendbuf) &&
|
||||
(a->sendcount == b->sendcount) &&
|
||||
(a->sendtype == b->sendtype) &&
|
||||
(a->recvbuf == b->recvbuf) &&
|
||||
(a->recvcount == b->recvcount) &&
|
||||
(a->recvtype == b->recvtype) &&
|
||||
(a->root == b->root) ) {
|
||||
return 0;
|
||||
}
|
||||
if( a->sendbuf < b->sendbuf ) {
|
||||
return -1;
|
||||
}
|
||||
return +1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* simple linear MPI_Iscatter */
|
||||
int NBC_Iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle) {
|
||||
int rank, p, res, i;
|
||||
MPI_Aint sndext;
|
||||
NBC_Schedule *schedule;
|
||||
char *sbuf, inplace;
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
NBC_Scatter_args *args, *found, search;
|
||||
#endif
|
||||
|
||||
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(sendtype, &sndext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
|
||||
handle->tmpbuf=NULL;
|
||||
|
||||
if((rank == root) && (!inplace)) {
|
||||
sbuf = ((char *)sendbuf) + (rank*sendcount*sndext);
|
||||
/* if I am the root - just copy the message (not for MPI_IN_PLACE) */
|
||||
res = NBC_Copy(sbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
|
||||
}
|
||||
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* search schedule in communicator specific tree */
|
||||
search.sendbuf=sendbuf;
|
||||
search.sendcount=sendcount;
|
||||
search.sendtype=sendtype;
|
||||
search.recvbuf=recvbuf;
|
||||
search.recvcount=recvcount;
|
||||
search.recvtype=recvtype;
|
||||
search.root=root;
|
||||
found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_SCATTER], &search);
|
||||
if(found == NULL) {
|
||||
#endif
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
|
||||
|
||||
/* receive from root */
|
||||
if(rank != root) {
|
||||
/* recv msg from root */
|
||||
res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
} else {
|
||||
for(i=0;i<p;i++) {
|
||||
sbuf = ((char *)sendbuf) + (i*sendcount*sndext);
|
||||
if(i != root) {
|
||||
/* root sends the right buffer to the right receiver */
|
||||
res = NBC_Sched_send(sbuf, false, sendcount, sendtype, i, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
/* save schedule to tree */
|
||||
args = malloc(sizeof(NBC_Scatter_args));
|
||||
args->sendbuf=sendbuf;
|
||||
args->sendcount=sendcount;
|
||||
args->sendtype=sendtype;
|
||||
args->recvbuf=recvbuf;
|
||||
args->recvcount=recvcount;
|
||||
args->recvtype=recvtype;
|
||||
args->root=root;
|
||||
args->schedule=schedule;
|
||||
res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_SCATTER], args, args, 0);
|
||||
if(res != 0) printf("error in dict_insert() (%i)\n", res);
|
||||
/* increase number of elements for A2A */
|
||||
if(++handle->comminfo->NBC_Dict_size[NBC_SCATTER] > SCHED_DICT_UPPER) {
|
||||
NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_SCATTER], &handle->comminfo->NBC_Dict_size[NBC_SCATTER]);
|
||||
}
|
||||
} else {
|
||||
/* found schedule */
|
||||
schedule=found->schedule;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
62
ompi/mca/coll/libnbc/nbc_iscatterv.c
Обычный файл
62
ompi/mca/coll/libnbc/nbc_iscatterv.c
Обычный файл
@ -0,0 +1,62 @@
|
||||
#include "nbc.h"
|
||||
|
||||
/* an scatterv schedule can not be cached easily because the contents
|
||||
* ot the recvcounts array may change, so a comparison of the address
|
||||
* would not be sufficient ... we simply do not cache it */
|
||||
|
||||
/* simple linear MPI_Iscatterv */
|
||||
int NBC_Iscatterv(void* sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle) {
|
||||
int rank, p, res, i;
|
||||
MPI_Aint sndext;
|
||||
NBC_Schedule *schedule;
|
||||
char *sbuf, inplace;
|
||||
|
||||
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
|
||||
|
||||
res = NBC_Init_handle(handle, comm);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(sendtype, &sndext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
|
||||
schedule = malloc(sizeof(NBC_Schedule));
|
||||
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
|
||||
|
||||
handle->tmpbuf=NULL;
|
||||
|
||||
res = NBC_Sched_create(schedule);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
|
||||
|
||||
/* receive from root */
|
||||
if(rank != root) {
|
||||
/* recv msg from root */
|
||||
res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
|
||||
} else {
|
||||
for(i=0;i<p;i++) {
|
||||
sbuf = ((char *)sendbuf) + (displs[i]*sndext);
|
||||
if(i == root) {
|
||||
if(!inplace) {
|
||||
/* if I am the root - just copy the message */
|
||||
res = NBC_Copy(sbuf, sendcounts[i], sendtype, recvbuf, recvcount, recvtype, comm);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
|
||||
}
|
||||
} else {
|
||||
/* root sends the right buffer to the right receiver */
|
||||
res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
res = NBC_Sched_commit(schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
|
||||
|
||||
res = NBC_Start(handle, schedule);
|
||||
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
|
||||
|
||||
return NBC_OK;
|
||||
}
|
561
ompi/mca/coll/libnbc/nbc_op.c
Обычный файл
561
ompi/mca/coll/libnbc/nbc_op.c
Обычный файл
@ -0,0 +1,561 @@
|
||||
#include "nbc.h"
|
||||
|
||||
/****************** THIS FILE is automatically generated *********************
|
||||
* changes will be deleted at the next generation of this file - see nbc_op.c.m4 */
|
||||
|
||||
int NBC_Operation(void *buf3, void *buf1, void *buf2, MPI_Op op, MPI_Datatype type, int count) {
|
||||
int i;
|
||||
|
||||
if(type == MPI_INT) {
|
||||
if(op == MPI_MIN) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((int*)buf1) + i) > *(((int*)buf2) + i)) *(((int*)buf3) + i) = *(((int*)buf2) + i); else *(((int*)buf3) + i) = *(((int*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_MAX) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((int*)buf1) + i) < *(((int*)buf2) + i)) *(((int*)buf3) + i) = *(((int*)buf2) + i); else *(((int*)buf3) + i) = *(((int*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_SUM) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((int*)buf3) + i) = *(((int*)buf1) + i) + *(((int*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_PROD) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((int*)buf3) + i) = *(((int*)buf1) + i) * *(((int*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((int*)buf3) + i) = *(((int*)buf1) + i) && *(((int*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((int*)buf3) + i) = *(((int*)buf1) + i) & *(((int*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((int*)buf3) + i) = *(((int*)buf1) + i) || *(((int*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((int*)buf3) + i) = *(((int*)buf1) + i) | *(((int*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((int*)buf3) + i) = ((*(((int*)buf1) + i) ? 1 : 0) ^ (*(((int*)buf2) + i) ? 1 : 0));
|
||||
}
|
||||
} else if(op == MPI_BXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((int*)buf3) + i) = ((*(((int*)buf1) + i)) ^ (*(((int*)buf2) + i)));
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_LONG) {
|
||||
if(op == MPI_MIN) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((long*)buf1) + i) > *(((long*)buf2) + i)) *(((long*)buf3) + i) = *(((long*)buf2) + i); else *(((long*)buf3) + i) = *(((long*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_MAX) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((long*)buf1) + i) < *(((long*)buf2) + i)) *(((long*)buf3) + i) = *(((long*)buf2) + i); else *(((long*)buf3) + i) = *(((long*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_SUM) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((long*)buf3) + i) = *(((long*)buf1) + i) + *(((long*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_PROD) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((long*)buf3) + i) = *(((long*)buf1) + i) * *(((long*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((long*)buf3) + i) = *(((long*)buf1) + i) && *(((long*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((long*)buf3) + i) = *(((long*)buf1) + i) & *(((long*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((long*)buf3) + i) = *(((long*)buf1) + i) || *(((long*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((long*)buf3) + i) = *(((long*)buf1) + i) | *(((long*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((long*)buf3) + i) = ((*(((long*)buf1) + i) ? 1 : 0) ^ (*(((long*)buf2) + i) ? 1 : 0));
|
||||
}
|
||||
} else if(op == MPI_BXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((long*)buf3) + i) = ((*(((long*)buf1) + i)) ^ (*(((long*)buf2) + i)));
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_SHORT) {
|
||||
if(op == MPI_MIN) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((short*)buf1) + i) > *(((short*)buf2) + i)) *(((short*)buf3) + i) = *(((short*)buf2) + i); else *(((short*)buf3) + i) = *(((short*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_MAX) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((short*)buf1) + i) < *(((short*)buf2) + i)) *(((short*)buf3) + i) = *(((short*)buf2) + i); else *(((short*)buf3) + i) = *(((short*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_SUM) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((short*)buf3) + i) = *(((short*)buf1) + i) + *(((short*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_PROD) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((short*)buf3) + i) = *(((short*)buf1) + i) * *(((short*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((short*)buf3) + i) = *(((short*)buf1) + i) && *(((short*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((short*)buf3) + i) = *(((short*)buf1) + i) & *(((short*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((short*)buf3) + i) = *(((short*)buf1) + i) || *(((short*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((short*)buf3) + i) = *(((short*)buf1) + i) | *(((short*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((short*)buf3) + i) = ((*(((short*)buf1) + i) ? 1 : 0) ^ (*(((short*)buf2) + i) ? 1 : 0));
|
||||
}
|
||||
} else if(op == MPI_BXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((short*)buf3) + i) = ((*(((short*)buf1) + i)) ^ (*(((short*)buf2) + i)));
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_UNSIGNED) {
|
||||
if(op == MPI_MIN) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((unsigned int*)buf1) + i) > *(((unsigned int*)buf2) + i)) *(((unsigned int*)buf3) + i) = *(((unsigned int*)buf2) + i); else *(((unsigned int*)buf3) + i) = *(((unsigned int*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_MAX) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((unsigned int*)buf1) + i) < *(((unsigned int*)buf2) + i)) *(((unsigned int*)buf3) + i) = *(((unsigned int*)buf2) + i); else *(((unsigned int*)buf3) + i) = *(((unsigned int*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_SUM) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned int*)buf3) + i) = *(((unsigned int*)buf1) + i) + *(((unsigned int*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_PROD) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned int*)buf3) + i) = *(((unsigned int*)buf1) + i) * *(((unsigned int*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned int*)buf3) + i) = *(((unsigned int*)buf1) + i) && *(((unsigned int*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned int*)buf3) + i) = *(((unsigned int*)buf1) + i) & *(((unsigned int*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned int*)buf3) + i) = *(((unsigned int*)buf1) + i) || *(((unsigned int*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned int*)buf3) + i) = *(((unsigned int*)buf1) + i) | *(((unsigned int*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned int*)buf3) + i) = ((*(((unsigned int*)buf1) + i) ? 1 : 0) ^ (*(((unsigned int*)buf2) + i) ? 1 : 0));
|
||||
}
|
||||
} else if(op == MPI_BXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned int*)buf3) + i) = ((*(((unsigned int*)buf1) + i)) ^ (*(((unsigned int*)buf2) + i)));
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_UNSIGNED_LONG) {
|
||||
if(op == MPI_MIN) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((unsigned long*)buf1) + i) > *(((unsigned long*)buf2) + i)) *(((unsigned long*)buf3) + i) = *(((unsigned long*)buf2) + i); else *(((unsigned long*)buf3) + i) = *(((unsigned long*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_MAX) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((unsigned long*)buf1) + i) < *(((unsigned long*)buf2) + i)) *(((unsigned long*)buf3) + i) = *(((unsigned long*)buf2) + i); else *(((unsigned long*)buf3) + i) = *(((unsigned long*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_SUM) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned long*)buf3) + i) = *(((unsigned long*)buf1) + i) + *(((unsigned long*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_PROD) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned long*)buf3) + i) = *(((unsigned long*)buf1) + i) * *(((unsigned long*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned long*)buf3) + i) = *(((unsigned long*)buf1) + i) && *(((unsigned long*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned long*)buf3) + i) = *(((unsigned long*)buf1) + i) & *(((unsigned long*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned long*)buf3) + i) = *(((unsigned long*)buf1) + i) || *(((unsigned long*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned long*)buf3) + i) = *(((unsigned long*)buf1) + i) | *(((unsigned long*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned long*)buf3) + i) = ((*(((unsigned long*)buf1) + i) ? 1 : 0) ^ (*(((unsigned long*)buf2) + i) ? 1 : 0));
|
||||
}
|
||||
} else if(op == MPI_BXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned long*)buf3) + i) = ((*(((unsigned long*)buf1) + i)) ^ (*(((unsigned long*)buf2) + i)));
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_UNSIGNED_SHORT) {
|
||||
if(op == MPI_MIN) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((unsigned short*)buf1) + i) > *(((unsigned short*)buf2) + i)) *(((unsigned short*)buf3) + i) = *(((unsigned short*)buf2) + i); else *(((unsigned short*)buf3) + i) = *(((unsigned short*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_MAX) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((unsigned short*)buf1) + i) < *(((unsigned short*)buf2) + i)) *(((unsigned short*)buf3) + i) = *(((unsigned short*)buf2) + i); else *(((unsigned short*)buf3) + i) = *(((unsigned short*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_SUM) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned short*)buf3) + i) = *(((unsigned short*)buf1) + i) + *(((unsigned short*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_PROD) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned short*)buf3) + i) = *(((unsigned short*)buf1) + i) * *(((unsigned short*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned short*)buf3) + i) = *(((unsigned short*)buf1) + i) && *(((unsigned short*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned short*)buf3) + i) = *(((unsigned short*)buf1) + i) & *(((unsigned short*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned short*)buf3) + i) = *(((unsigned short*)buf1) + i) || *(((unsigned short*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned short*)buf3) + i) = *(((unsigned short*)buf1) + i) | *(((unsigned short*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_LXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned short*)buf3) + i) = ((*(((unsigned short*)buf1) + i) ? 1 : 0) ^ (*(((unsigned short*)buf2) + i) ? 1 : 0));
|
||||
}
|
||||
} else if(op == MPI_BXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((unsigned short*)buf3) + i) = ((*(((unsigned short*)buf1) + i)) ^ (*(((unsigned short*)buf2) + i)));
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_FLOAT) {
|
||||
if(op == MPI_MIN) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((float*)buf1) + i) > *(((float*)buf2) + i)) *(((float*)buf3) + i) = *(((float*)buf2) + i); else *(((float*)buf3) + i) = *(((float*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_MAX) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((float*)buf1) + i) < *(((float*)buf2) + i)) *(((float*)buf3) + i) = *(((float*)buf2) + i); else *(((float*)buf3) + i) = *(((float*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_SUM) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((float*)buf3) + i) = *(((float*)buf1) + i) + *(((float*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_PROD) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((float*)buf3) + i) = *(((float*)buf1) + i) * *(((float*)buf2) + i);
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_DOUBLE) {
|
||||
if(op == MPI_MIN) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((double*)buf1) + i) > *(((double*)buf2) + i)) *(((double*)buf3) + i) = *(((double*)buf2) + i); else *(((double*)buf3) + i) = *(((double*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_MAX) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((double*)buf1) + i) < *(((double*)buf2) + i)) *(((double*)buf3) + i) = *(((double*)buf2) + i); else *(((double*)buf3) + i) = *(((double*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_SUM) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((double*)buf3) + i) = *(((double*)buf1) + i) + *(((double*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_PROD) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((double*)buf3) + i) = *(((double*)buf1) + i) * *(((double*)buf2) + i);
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_LONG_DOUBLE) {
|
||||
if(op == MPI_MIN) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((long double*)buf1) + i) > *(((long double*)buf2) + i)) *(((long double*)buf3) + i) = *(((long double*)buf2) + i); else *(((long double*)buf3) + i) = *(((long double*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_MAX) {
|
||||
for(i=0; i<count; i++) {
|
||||
if(*(((long double*)buf1) + i) < *(((long double*)buf2) + i)) *(((long double*)buf3) + i) = *(((long double*)buf2) + i); else *(((long double*)buf3) + i) = *(((long double*)buf1) + i);
|
||||
}
|
||||
} else if(op == MPI_SUM) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((long double*)buf3) + i) = *(((long double*)buf1) + i) + *(((long double*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_PROD) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((long double*)buf3) + i) = *(((long double*)buf1) + i) * *(((long double*)buf2) + i);
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_BYTE) {
|
||||
if(op == MPI_BAND) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((char*)buf3) + i) = *(((char*)buf1) + i) & *(((char*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((char*)buf3) + i) = *(((char*)buf1) + i) | *(((char*)buf2) + i);
|
||||
}
|
||||
} else if(op == MPI_BXOR) {
|
||||
for(i=0; i<count; i++) {
|
||||
*(((char*)buf3) + i) = ((*(((char*)buf1) + i)) ^ (*(((char*)buf2) + i)));
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_FLOAT_INT) {
|
||||
if(op == MPI_MAXLOC) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
float val;
|
||||
int rank;
|
||||
} float_int;
|
||||
float_int *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((float_int*)buf1) + i;
|
||||
ptr2 = ((float_int*)buf2) + i;
|
||||
ptr3 = ((float_int*)buf3) + i;
|
||||
|
||||
if(ptr1->val < ptr2->val) {
|
||||
ptr3->val = ptr2->val; ptr3->rank = ptr2->rank;
|
||||
} else {
|
||||
ptr3->val = ptr1->val; ptr3->rank = ptr1->rank;
|
||||
}
|
||||
}
|
||||
} else if(op == MPI_MINLOC) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
float val;
|
||||
int rank;
|
||||
} float_int;
|
||||
float_int *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((float_int*)buf1) + i;
|
||||
ptr2 = ((float_int*)buf2) + i;
|
||||
ptr3 = ((float_int*)buf3) + i;
|
||||
|
||||
if(ptr1->val > ptr2->val) {
|
||||
ptr3->val = ptr2->val; ptr3->rank = ptr2->rank;
|
||||
} else {
|
||||
ptr3->val = ptr1->val; ptr3->rank = ptr1->rank;
|
||||
}
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_DOUBLE_INT) {
|
||||
if(op == MPI_MAXLOC) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
double val;
|
||||
int rank;
|
||||
} double_int;
|
||||
double_int *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((double_int*)buf1) + i;
|
||||
ptr2 = ((double_int*)buf2) + i;
|
||||
ptr3 = ((double_int*)buf3) + i;
|
||||
|
||||
if(ptr1->val < ptr2->val) {
|
||||
ptr3->val = ptr2->val; ptr3->rank = ptr2->rank;
|
||||
} else {
|
||||
ptr3->val = ptr1->val; ptr3->rank = ptr1->rank;
|
||||
}
|
||||
}
|
||||
} else if(op == MPI_MINLOC) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
double val;
|
||||
int rank;
|
||||
} double_int;
|
||||
double_int *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((double_int*)buf1) + i;
|
||||
ptr2 = ((double_int*)buf2) + i;
|
||||
ptr3 = ((double_int*)buf3) + i;
|
||||
|
||||
if(ptr1->val > ptr2->val) {
|
||||
ptr3->val = ptr2->val; ptr3->rank = ptr2->rank;
|
||||
} else {
|
||||
ptr3->val = ptr1->val; ptr3->rank = ptr1->rank;
|
||||
}
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_LONG_INT) {
|
||||
if(op == MPI_MAXLOC) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
long val;
|
||||
int rank;
|
||||
} long_int;
|
||||
long_int *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((long_int*)buf1) + i;
|
||||
ptr2 = ((long_int*)buf2) + i;
|
||||
ptr3 = ((long_int*)buf3) + i;
|
||||
|
||||
if(ptr1->val < ptr2->val) {
|
||||
ptr3->val = ptr2->val; ptr3->rank = ptr2->rank;
|
||||
} else {
|
||||
ptr3->val = ptr1->val; ptr3->rank = ptr1->rank;
|
||||
}
|
||||
}
|
||||
} else if(op == MPI_MINLOC) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
long val;
|
||||
int rank;
|
||||
} long_int;
|
||||
long_int *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((long_int*)buf1) + i;
|
||||
ptr2 = ((long_int*)buf2) + i;
|
||||
ptr3 = ((long_int*)buf3) + i;
|
||||
|
||||
if(ptr1->val > ptr2->val) {
|
||||
ptr3->val = ptr2->val; ptr3->rank = ptr2->rank;
|
||||
} else {
|
||||
ptr3->val = ptr1->val; ptr3->rank = ptr1->rank;
|
||||
}
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_2INT) {
|
||||
if(op == MPI_MAXLOC) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
int val;
|
||||
int rank;
|
||||
} int_int;
|
||||
int_int *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((int_int*)buf1) + i;
|
||||
ptr2 = ((int_int*)buf2) + i;
|
||||
ptr3 = ((int_int*)buf3) + i;
|
||||
|
||||
if(ptr1->val < ptr2->val) {
|
||||
ptr3->val = ptr2->val; ptr3->rank = ptr2->rank;
|
||||
} else {
|
||||
ptr3->val = ptr1->val; ptr3->rank = ptr1->rank;
|
||||
}
|
||||
}
|
||||
} else if(op == MPI_MINLOC) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
int val;
|
||||
int rank;
|
||||
} int_int;
|
||||
int_int *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((int_int*)buf1) + i;
|
||||
ptr2 = ((int_int*)buf2) + i;
|
||||
ptr3 = ((int_int*)buf3) + i;
|
||||
|
||||
if(ptr1->val > ptr2->val) {
|
||||
ptr3->val = ptr2->val; ptr3->rank = ptr2->rank;
|
||||
} else {
|
||||
ptr3->val = ptr1->val; ptr3->rank = ptr1->rank;
|
||||
}
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_SHORT_INT) {
|
||||
if(op == MPI_MAXLOC) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
short val;
|
||||
int rank;
|
||||
} short_int;
|
||||
short_int *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((short_int*)buf1) + i;
|
||||
ptr2 = ((short_int*)buf2) + i;
|
||||
ptr3 = ((short_int*)buf3) + i;
|
||||
|
||||
if(ptr1->val < ptr2->val) {
|
||||
ptr3->val = ptr2->val; ptr3->rank = ptr2->rank;
|
||||
} else {
|
||||
ptr3->val = ptr1->val; ptr3->rank = ptr1->rank;
|
||||
}
|
||||
}
|
||||
} else if(op == MPI_MINLOC) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
short val;
|
||||
int rank;
|
||||
} short_int;
|
||||
short_int *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((short_int*)buf1) + i;
|
||||
ptr2 = ((short_int*)buf2) + i;
|
||||
ptr3 = ((short_int*)buf3) + i;
|
||||
|
||||
if(ptr1->val > ptr2->val) {
|
||||
ptr3->val = ptr2->val; ptr3->rank = ptr2->rank;
|
||||
} else {
|
||||
ptr3->val = ptr1->val; ptr3->rank = ptr1->rank;
|
||||
}
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else if(type == MPI_LONG_DOUBLE_INT) {
|
||||
if(op == MPI_MAXLOC) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
long double val;
|
||||
int rank;
|
||||
} long_double_int;
|
||||
long_double_int *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((long_double_int*)buf1) + i;
|
||||
ptr2 = ((long_double_int*)buf2) + i;
|
||||
ptr3 = ((long_double_int*)buf3) + i;
|
||||
|
||||
if(ptr1->val < ptr2->val) {
|
||||
ptr3->val = ptr2->val; ptr3->rank = ptr2->rank;
|
||||
} else {
|
||||
ptr3->val = ptr1->val; ptr3->rank = ptr1->rank;
|
||||
}
|
||||
}
|
||||
} else if(op == MPI_MINLOC) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
long double val;
|
||||
int rank;
|
||||
} long_double_int;
|
||||
long_double_int *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((long_double_int*)buf1) + i;
|
||||
ptr2 = ((long_double_int*)buf2) + i;
|
||||
ptr3 = ((long_double_int*)buf3) + i;
|
||||
|
||||
if(ptr1->val > ptr2->val) {
|
||||
ptr3->val = ptr2->val; ptr3->rank = ptr2->rank;
|
||||
} else {
|
||||
ptr3->val = ptr1->val; ptr3->rank = ptr1->rank;
|
||||
}
|
||||
}
|
||||
} else return NBC_OP_NOT_SUPPORTED;
|
||||
} else return NBC_DATATYPE_NOT_SUPPORTED;
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
244
ompi/mca/coll/libnbc/nbc_op.c.m4
Обычный файл
244
ompi/mca/coll/libnbc/nbc_op.c.m4
Обычный файл
@ -0,0 +1,244 @@
|
||||
dnl/*
|
||||
dnl * Copyright (c) 2006 The Trustees of Indiana University and Indiana
|
||||
dnl * University Research and Technology
|
||||
dnl * Corporation. All rights reserved.
|
||||
dnl * Copyright (c) 2006 The Technical University of Chemnitz. All
|
||||
dnl * rights reserved.
|
||||
dnl */
|
||||
dnl
|
||||
dnl this m4 code generate all MPI intrinsic operations
|
||||
dnl every macro is prefixed with m4_ to retain clarity (this means that
|
||||
dnl everything prefixed with m4_ will be replaced by m4!)
|
||||
dnl
|
||||
dnl
|
||||
dnl
|
||||
dnl ########## define all MPI intrinsic Operations and appropriate C code #############
|
||||
define(m4_OP_MPI_MIN, `if(m4_ARG1$1 > m4_ARG2$1) m4_ARG3$1 = m4_ARG2$1; else m4_ARG3$1 = m4_ARG1$1;')dnl
|
||||
define(m4_OP_MPI_MAX, `if(m4_ARG1$1 < m4_ARG2$1) m4_ARG3$1 = m4_ARG2$1; else m4_ARG3$1 = m4_ARG1$1;')dnl
|
||||
define(m4_OP_MPI_SUM, `m4_ARG3$1 = m4_ARG1$1 + m4_ARG2$1;')dnl
|
||||
define(m4_OP_MPI_PROD, `m4_ARG3$1 = m4_ARG1$1 * m4_ARG2$1;')dnl
|
||||
define(m4_OP_MPI_LAND, `m4_ARG3$1 = m4_ARG1$1 && m4_ARG2$1;')dnl
|
||||
define(m4_OP_MPI_BAND, `m4_ARG3$1 = m4_ARG1$1 & m4_ARG2$1;')dnl
|
||||
define(m4_OP_MPI_LOR, `m4_ARG3$1 = m4_ARG1$1 || m4_ARG2$1;')dnl
|
||||
define(m4_OP_MPI_BOR, `m4_ARG3$1 = m4_ARG1$1 | m4_ARG2$1;')dnl
|
||||
define(m4_OP_MPI_LXOR, `m4_ARG3$1 = ((m4_ARG1$1 ? 1 : 0) ^ (m4_ARG2$1 ? 1 : 0));')dnl
|
||||
define(m4_OP_MPI_BXOR, `m4_ARG3$1 = ((m4_ARG1$1) ^ (m4_ARG2$1));')dnl
|
||||
define(m4_OP_MPI_MINLOC, `if(m4_ARG1$1_VAL > m4_ARG2$1_VAL) {
|
||||
m4_ARG3$1_VAL = m4_ARG2$1_VAL; m4_ARG3$1_RANK = m4_ARG2$1_RANK;
|
||||
} else {
|
||||
m4_ARG3$1_VAL = m4_ARG1$1_VAL; m4_ARG3$1_RANK = m4_ARG1$1_RANK;
|
||||
}')dnl
|
||||
define(m4_OP_MPI_MAXLOC, `if(m4_ARG1$1_VAL < m4_ARG2$1_VAL) {
|
||||
m4_ARG3$1_VAL = m4_ARG2$1_VAL; m4_ARG3$1_RANK = m4_ARG2$1_RANK;
|
||||
} else {
|
||||
m4_ARG3$1_VAL = m4_ARG1$1_VAL; m4_ARG3$1_RANK = m4_ARG1$1_RANK;
|
||||
}')dnl
|
||||
dnl
|
||||
dnl ########## define helper macros #################
|
||||
dnl ########## loop-unrolled version -> slows it down :-( ######
|
||||
dnl define(m4_IF, `if(op == $1) {
|
||||
dnl /* loop unrolling - 4 */
|
||||
dnl for(i=0; i<count-3; i=i+4) {
|
||||
dnl m4_CTYPE_$2 val11, val12, val21, val22, val31, val32, val41, val42;
|
||||
dnl
|
||||
dnl val11 = *(((m4_CTYPE_$2*)buf1) + i);
|
||||
dnl val12 = *(((m4_CTYPE_$2*)buf2) + i);
|
||||
dnl val21 = *(((m4_CTYPE_$2*)buf1) + i+1);
|
||||
dnl val22 = *(((m4_CTYPE_$2*)buf2) + i+1);
|
||||
dnl val31 = *(((m4_CTYPE_$2*)buf1) + i+2);
|
||||
dnl val32 = *(((m4_CTYPE_$2*)buf2) + i+2);
|
||||
dnl val41 = *(((m4_CTYPE_$2*)buf1) + i+3);
|
||||
dnl val42 = *(((m4_CTYPE_$2*)buf2) + i+3);
|
||||
dnl
|
||||
dnl define(m4_ARG11_$1$2, val11)dnl
|
||||
dnl define(m4_ARG21_$1$2, val12)dnl
|
||||
dnl define(m4_ARG31_$1$2, val11)dnl
|
||||
dnl m4_OP_$1(1_$1$2)
|
||||
dnl define(m4_ARG12_$1$2, val21)dnl
|
||||
dnl define(m4_ARG22_$1$2, val22)dnl
|
||||
dnl define(m4_ARG32_$1$2, val21)dnl
|
||||
dnl m4_OP_$1(2_$1$2)
|
||||
dnl define(m4_ARG13_$1$2, val31)dnl
|
||||
dnl define(m4_ARG23_$1$2, val32)dnl
|
||||
dnl define(m4_ARG33_$1$2, val31)dnl
|
||||
dnl m4_OP_$1(3_$1$2)
|
||||
dnl define(m4_ARG14_$1$2, val41)dnl
|
||||
dnl define(m4_ARG24_$1$2, val42)dnl
|
||||
dnl define(m4_ARG34_$1$2, val41)dnl
|
||||
dnl m4_OP_$1(4_$1$2)
|
||||
dnl
|
||||
dnl *(((m4_CTYPE_$2*)buf3) + i) = val11;
|
||||
dnl *(((m4_CTYPE_$2*)buf3) + i+1) = val21;
|
||||
dnl *(((m4_CTYPE_$2*)buf3) + i+2) = val31;
|
||||
dnl *(((m4_CTYPE_$2*)buf3) + i+3) = val41;
|
||||
dnl }
|
||||
dnl for(i=i+4;i<count;i++) {
|
||||
dnl m4_CTYPE_$2 val11, val12;
|
||||
dnl
|
||||
dnl val11 = *(((m4_CTYPE_$2*)buf1) + i);
|
||||
dnl val12 = *(((m4_CTYPE_$2*)buf2) + i);
|
||||
dnl
|
||||
dnl define(m4_ARG15_$1$2, val11)dnl
|
||||
dnl define(m4_ARG25_$1$2, val12)dnl
|
||||
dnl define(m4_ARG35_$1$2, val11)dnl
|
||||
dnl m4_OP_$1(5_$1$2)
|
||||
dnl
|
||||
dnl *(((m4_CTYPE_$2*)buf3) + i) = val11;
|
||||
dnl }
|
||||
dnl }')dnl
|
||||
dnl ##########################################################
|
||||
dnl ########### THIS is faster as the unrolled code :-(( #####
|
||||
define(m4_IF, `if(op == $1) {
|
||||
for(i=0; i<count; i++) {
|
||||
define(m4_ARG1_$2, `*(((m4_CTYPE_$2*)buf1) + i)')dnl
|
||||
define(m4_ARG2_$2, `*(((m4_CTYPE_$2*)buf2) + i)')dnl
|
||||
define(m4_ARG3_$2, `*(((m4_CTYPE_$2*)buf3) + i)')dnl
|
||||
m4_OP_$1(_$2)
|
||||
}
|
||||
}')dnl
|
||||
dnl ###############################################
|
||||
define(m4_LOCIF, `if(op == $1) {
|
||||
for(i=0; i<count; i++) {
|
||||
typedef struct {
|
||||
m4_CTYPE1_$2 val;
|
||||
m4_CTYPE2_$2 rank;
|
||||
} m4_CTYPE3_$2;
|
||||
m4_CTYPE3_$2 *ptr1, *ptr2, *ptr3;
|
||||
|
||||
ptr1 = ((m4_CTYPE3_$2*)buf1) + i;
|
||||
ptr2 = ((m4_CTYPE3_$2*)buf2) + i;
|
||||
ptr3 = ((m4_CTYPE3_$2*)buf3) + i;
|
||||
|
||||
define(m4_ARG1_VAL, ptr1->val)dnl
|
||||
define(m4_ARG2_VAL, ptr2->val)dnl
|
||||
define(m4_ARG3_VAL, ptr3->val)dnl
|
||||
define(m4_ARG1_RANK, ptr1->rank)dnl
|
||||
define(m4_ARG2_RANK, ptr2->rank)dnl
|
||||
define(m4_ARG3_RANK, ptr3->rank)dnl
|
||||
m4_OP_$1
|
||||
}
|
||||
}')dnl
|
||||
dnl ##########################################################
|
||||
define(m4_TYPE, `if(type == $1) {
|
||||
m4_OPTYPE_$1($1)
|
||||
}')dnl
|
||||
dnl ########## define possible operations for each type
|
||||
dnl
|
||||
dnl
|
||||
dnl ####### MPI_INT ########
|
||||
define(m4_OPTYPE_MPI_INT, `define(m4_CTYPE_$1, `int')dnl
|
||||
m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl
|
||||
m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl
|
||||
m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl
|
||||
m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_LONG ########
|
||||
define(m4_OPTYPE_MPI_LONG, `define(m4_CTYPE_$1, `long')dnl
|
||||
m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl
|
||||
m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl
|
||||
m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl
|
||||
m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_SHORT ########
|
||||
define(m4_OPTYPE_MPI_SHORT, `define(m4_CTYPE_$1, `short')dnl
|
||||
m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl
|
||||
m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl
|
||||
m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl
|
||||
m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_UNSIGNED ########
|
||||
define(m4_OPTYPE_MPI_UNSIGNED, `define(m4_CTYPE_$1, `unsigned int')dnl
|
||||
m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl
|
||||
m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl
|
||||
m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl
|
||||
m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_UNSIGNED_LONG ########
|
||||
define(m4_OPTYPE_MPI_UNSIGNED_LONG, `define(m4_CTYPE_$1, `unsigned long')dnl
|
||||
m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl
|
||||
m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl
|
||||
m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl
|
||||
m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_UNSIGNED_SHORT ########
|
||||
define(m4_OPTYPE_MPI_UNSIGNED_SHORT, `define(m4_CTYPE_$1, `unsigned short')dnl
|
||||
m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl
|
||||
m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl
|
||||
m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl
|
||||
m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_FLOAT ########
|
||||
define(m4_OPTYPE_MPI_FLOAT, `define(m4_CTYPE_$1, `float')dnl
|
||||
m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl
|
||||
m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_DOUBLE ########
|
||||
define(m4_OPTYPE_MPI_DOUBLE, `define(m4_CTYPE_$1, `double')dnl
|
||||
m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl
|
||||
m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_LONG_DOUBLE ########
|
||||
define(m4_OPTYPE_MPI_LONG_DOUBLE, `define(m4_CTYPE_$1, `long double')dnl
|
||||
m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl
|
||||
m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_BYTE ########
|
||||
define(m4_OPTYPE_MPI_BYTE, `define(m4_CTYPE_$1, `char')dnl
|
||||
m4_IF(MPI_BAND, $1) else m4_IF(MPI_BOR, $1) else dnl
|
||||
m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_FLOAT_INT ########
|
||||
define(m4_OPTYPE_MPI_FLOAT_INT, `define(m4_CTYPE1_$1, `float')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `float_int')dnl
|
||||
m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_DOUBLE_INT ########
|
||||
define(m4_OPTYPE_MPI_DOUBLE_INT, `define(m4_CTYPE1_$1, `double')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `double_int')dnl
|
||||
m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_LONG_INT ########
|
||||
define(m4_OPTYPE_MPI_LONG_INT, `define(m4_CTYPE1_$1, `long')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `long_int')dnl
|
||||
m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_2INT ########
|
||||
define(m4_OPTYPE_MPI_2INT, `define(m4_CTYPE1_$1, `int')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `int_int')dnl
|
||||
m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_SHORT_INT ########
|
||||
define(m4_OPTYPE_MPI_SHORT_INT, `define(m4_CTYPE1_$1, `short')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `short_int')dnl
|
||||
m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### MPI_LONG_DOUBLE_INT ########
|
||||
define(m4_OPTYPE_MPI_LONG_DOUBLE_INT, `define(m4_CTYPE1_$1, `long double')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `long_double_int')dnl
|
||||
m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl
|
||||
dnl
|
||||
dnl ####### begin the real program :-) #########
|
||||
dnl
|
||||
#include "nbc.h"
|
||||
|
||||
/****************** THIS FILE is automatically generated *********************
|
||||
* changes will be deleted at the next generation of this file - see nbc_op.c.m4 */
|
||||
|
||||
int NBC_Operation(void *buf3, void *buf1, void *buf2, MPI_Op op, MPI_Datatype type, int count) {
|
||||
int i;
|
||||
|
||||
m4_TYPE(MPI_INT) else dnl
|
||||
m4_TYPE(MPI_LONG) else dnl
|
||||
m4_TYPE(MPI_SHORT) else dnl
|
||||
m4_TYPE(MPI_UNSIGNED) else dnl
|
||||
m4_TYPE(MPI_UNSIGNED_LONG) else dnl
|
||||
m4_TYPE(MPI_UNSIGNED_SHORT) else dnl
|
||||
m4_TYPE(MPI_FLOAT) else dnl
|
||||
m4_TYPE(MPI_DOUBLE) else dnl
|
||||
m4_TYPE(MPI_LONG_DOUBLE) else dnl
|
||||
m4_TYPE(MPI_BYTE) else dnl
|
||||
m4_TYPE(MPI_FLOAT_INT) else dnl
|
||||
m4_TYPE(MPI_DOUBLE_INT) else dnl
|
||||
m4_TYPE(MPI_LONG_INT) else dnl
|
||||
m4_TYPE(MPI_2INT) else dnl
|
||||
m4_TYPE(MPI_SHORT_INT) else dnl
|
||||
m4_TYPE(MPI_LONG_DOUBLE_INT) else dnl
|
||||
return NBC_DATATYPE_NOT_SUPPORTED;
|
||||
|
||||
return NBC_OK;
|
||||
}
|
||||
|
448
ompi/mca/coll/libnbc/ompi_component.c
Обычный файл
448
ompi/mca/coll/libnbc/ompi_component.c
Обычный файл
@ -0,0 +1,448 @@
|
||||
#include <stdio.h>
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/datatype/datatype.h"
|
||||
#include "ompi_config.h"
|
||||
#include "mpi.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/mca/coll/base/base.h"
|
||||
|
||||
#include "ompi_component.h"
|
||||
|
||||
#include "nbc.h"
|
||||
|
||||
int mca_coll_libnbc_allgather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int rcount, struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Iallgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_allgatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void * rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Iallgatherv(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_allreduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Iallreduce(sbuf, rbuf, count, dtype, op, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_alltoall_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Ialltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_alltoallv_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Ialltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_alltoallw_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t **sdtypes,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t **rdtypes,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
|
||||
/* not implemented in libnbc yet ...
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Iallgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_barrier_intra(struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
/*printf("calling barrier ...\n");*/
|
||||
res = NBC_Ibarrier(comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_bcast_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Ibcast(buff, count, datatype, root, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_exscan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
/* not implemented yet ...
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Iallgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
*/
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_gather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Igather(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_gatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Igatherv(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, root, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_reduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Ireduce(sbuf, rbuf, count, dtype, op, root, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Ireduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_scan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Iscan(sbuf, rbuf, count, dtype, op, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_scatter_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Iscatter(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
int mca_coll_libnbc_scatterv_intra(void *sbuf, int *scounts,
|
||||
int *disps, struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
NBC_Handle handle;
|
||||
int res;
|
||||
|
||||
res = NBC_Iscatterv(sbuf, scounts, disps, sdtype, rbuf, rcount, rdtype, root, comm, &handle);
|
||||
if(res != NBC_OK) return res;
|
||||
res = NBC_Wait(&handle);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Public string showing the coll ompi_libnbc component version number
|
||||
*/
|
||||
const char *mca_coll_libnbc_component_version_string =
|
||||
"Open MPI libnbc collective MCA component version " OMPI_VERSION;
|
||||
|
||||
/*
|
||||
* Global variable
|
||||
*/
|
||||
int mca_coll_libnbc_priority_param = -1;
|
||||
|
||||
/*
|
||||
* Local function
|
||||
*/
|
||||
static int libnbc_open(void);
|
||||
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
|
||||
const mca_coll_base_component_1_0_0_t mca_coll_libnbc_component = {
|
||||
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itlibnbc */
|
||||
|
||||
{
|
||||
/* Indicate that we are a coll v1.0.0 component (which also
|
||||
implies a specific MCA version) */
|
||||
|
||||
MCA_COLL_BASE_VERSION_1_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
|
||||
"libnbc",
|
||||
OMPI_MAJOR_VERSION,
|
||||
OMPI_MINOR_VERSION,
|
||||
OMPI_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
|
||||
libnbc_open,
|
||||
NULL
|
||||
},
|
||||
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
|
||||
true
|
||||
},
|
||||
|
||||
/* Initialization / querying functions */
|
||||
|
||||
mca_coll_libnbc_init_query,
|
||||
mca_coll_libnbc_comm_query,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
||||
static int libnbc_open(void)
|
||||
{
|
||||
/* We'll always be picked if there's only one process in the
|
||||
communicator */
|
||||
|
||||
mca_coll_libnbc_priority_param =
|
||||
mca_base_param_register_int("coll", "libnbc", "priority", NULL, 1);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Module
|
||||
*/
|
||||
static const mca_coll_base_module_1_0_0_t module = {
|
||||
|
||||
/* Initialization / finalization functions */
|
||||
|
||||
mca_coll_libnbc_module_init,
|
||||
mca_coll_libnbc_module_finalize,
|
||||
|
||||
/* Collective function pointers */
|
||||
|
||||
mca_coll_libnbc_allgather_intra,
|
||||
mca_coll_libnbc_allgatherv_intra,
|
||||
mca_coll_libnbc_allreduce_intra,
|
||||
mca_coll_libnbc_alltoall_intra,
|
||||
mca_coll_libnbc_alltoallv_intra,
|
||||
NULL, /* not implemented yet - mca_coll_libnbc_alltoallw_intra, */
|
||||
mca_coll_libnbc_barrier_intra,
|
||||
mca_coll_libnbc_bcast_intra,
|
||||
NULL, /* not implemented yet - mca_coll_libnbc_exscan_intra, */
|
||||
mca_coll_libnbc_gather_intra,
|
||||
mca_coll_libnbc_gatherv_intra,
|
||||
mca_coll_libnbc_reduce_intra,
|
||||
mca_coll_libnbc_reduce_scatter_intra,
|
||||
mca_coll_libnbc_scan_intra,
|
||||
mca_coll_libnbc_scatter_intra,
|
||||
mca_coll_libnbc_scatterv_intra
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Initial query function that is invoked during MPI_INIT, allowing
|
||||
* this module to indicate what level of thread support it provides.
|
||||
*/
|
||||
int mca_coll_libnbc_init_query(bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
/* Nothing to do */
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Invoked when there's a new communicator that has been created.
|
||||
* Look at the communicator and decide which set of functions and
|
||||
* priority we want to return.
|
||||
*/
|
||||
const mca_coll_base_module_1_0_0_t *
|
||||
mca_coll_libnbc_comm_query(struct ompi_communicator_t *comm, int *priority,
|
||||
struct mca_coll_base_comm_t **data)
|
||||
{
|
||||
/* We only work on intracommunicators */
|
||||
|
||||
if (!OMPI_COMM_IS_INTER(comm)) {
|
||||
if (OMPI_SUCCESS != mca_base_param_lookup_int(mca_coll_libnbc_priority_param, priority)) {
|
||||
return NULL;
|
||||
}
|
||||
/* printf("returning prio: %i\n", *priority); */
|
||||
|
||||
return &module;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Init module on the communicator
|
||||
*/
|
||||
const struct mca_coll_base_module_1_0_0_t* mca_coll_libnbc_module_init(struct ompi_communicator_t *comm)
|
||||
{
|
||||
|
||||
comm->c_coll_selected_data = (void*)NBC_Init_comm(comm);
|
||||
if(NULL == comm->c_coll_selected_data) return NULL;
|
||||
/* printf("communicator initialized comminfo: %lu:)\n", (unsigned long)comm->c_coll_selected_data); */
|
||||
|
||||
return &module;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Finalize module on the communicator
|
||||
*/
|
||||
int mca_coll_libnbc_module_finalize(struct ompi_communicator_t *comm)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
117
ompi/mca/coll/libnbc/ompi_component.h
Обычный файл
117
ompi/mca/coll/libnbc/ompi_component.h
Обычный файл
@ -0,0 +1,117 @@
|
||||
#ifndef MCA_COLL_libnbc_EXPORT_H
|
||||
#define MCA_COLL_libnbc_EXPORT_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
|
||||
#include "nbc.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Globally exported variable
|
||||
*/
|
||||
|
||||
extern const mca_coll_base_component_1_0_0_t mca_coll_libnbc_component;
|
||||
extern int mca_coll_libnbc_priority_param;
|
||||
|
||||
|
||||
/*
|
||||
* coll API functions
|
||||
*/
|
||||
|
||||
|
||||
/* API functions */
|
||||
|
||||
int mca_coll_libnbc_init_query(bool enable_progress_threads,
|
||||
bool enable_mpi_threads);
|
||||
const struct mca_coll_base_module_1_0_0_t *
|
||||
mca_coll_libnbc_comm_query(struct ompi_communicator_t *comm, int *priority,
|
||||
struct mca_coll_base_comm_t **data);
|
||||
|
||||
const struct mca_coll_base_module_1_0_0_t *
|
||||
mca_coll_libnbc_module_init(struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_module_finalize(struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_libnbc_allgather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_allgatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void * rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_allreduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_alltoall_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_alltoallv_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_alltoallw_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t **sdtypes,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t **rdtypes,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_barrier_intra(struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_bcast_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_exscan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_gather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int rcount, struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_gatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_reduce_intra(void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_reduce_scatter_intra(void *sbuf, void *rbuf,
|
||||
int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_scan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_scatter_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int rcount, struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm);
|
||||
int mca_coll_libnbc_scatterv_intra(void *sbuf, int *scounts, int *disps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif /* MCA_COLL_libnbc_EXPORT_H */
|
Загрузка…
x
Ссылка в новой задаче
Block a user