From d37a25a2d0e8df56afe0afd5fdf85abc691eff09 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Tue, 22 Jul 2008 00:57:23 +0000 Subject: [PATCH] Remove per http://www.open-mpi.org/community/lists/devel/2008/07/4386.php This commit was SVN r18972. --- ompi/mca/coll/libnbc/.ompi_ignore | 0 ompi/mca/coll/libnbc/COPYRIGHT | 7 - ompi/mca/coll/libnbc/LICENSE | 40 - ompi/mca/coll/libnbc/Makefile.am | 64 -- ompi/mca/coll/libnbc/README | 17 - ompi/mca/coll/libnbc/README.dist | 12 - ompi/mca/coll/libnbc/c2f.m4 | 12 - ompi/mca/coll/libnbc/configure.params | 22 - ompi/mca/coll/libnbc/dict.c | 106 -- ompi/mca/coll/libnbc/dict.h | 140 --- ompi/mca/coll/libnbc/dict_private.h | 84 -- ompi/mca/coll/libnbc/hb_tree.c | 906 ---------------- ompi/mca/coll/libnbc/hb_tree.h | 64 -- ompi/mca/coll/libnbc/ib.c | 1142 -------------------- ompi/mca/coll/libnbc/ib.h | 119 -- ompi/mca/coll/libnbc/ib_main.c | 104 -- ompi/mca/coll/libnbc/main.c | 294 ----- ompi/mca/coll/libnbc/nbc.c | 752 ------------- ompi/mca/coll/libnbc/nbc.h | 657 ----------- ompi/mca/coll/libnbc/nbc_iallgather.c | 202 ---- ompi/mca/coll/libnbc/nbc_iallgatherv.c | 70 -- ompi/mca/coll/libnbc/nbc_iallreduce.c | 332 ------ ompi/mca/coll/libnbc/nbc_ialltoall.c | 300 ----- ompi/mca/coll/libnbc/nbc_ialltoallv.c | 67 -- ompi/mca/coll/libnbc/nbc_ibarrier.c | 75 -- ompi/mca/coll/libnbc/nbc_ibcast.c | 234 ---- ompi/mca/coll/libnbc/nbc_igather.c | 117 -- ompi/mca/coll/libnbc/nbc_igatherv.c | 62 -- ompi/mca/coll/libnbc/nbc_ireduce.c | 277 ----- ompi/mca/coll/libnbc/nbc_ireduce_scatter.c | 132 --- ompi/mca/coll/libnbc/nbc_iscan.c | 120 -- ompi/mca/coll/libnbc/nbc_iscatter.c | 117 -- ompi/mca/coll/libnbc/nbc_iscatterv.c | 62 -- ompi/mca/coll/libnbc/nbc_op.c | 561 ---------- ompi/mca/coll/libnbc/nbc_op.c.m4 | 244 ----- ompi/mca/coll/libnbc/ompi_component.c | 447 -------- ompi/mca/coll/libnbc/ompi_component.h | 117 -- 37 files changed, 8078 deletions(-) delete mode 100644 ompi/mca/coll/libnbc/.ompi_ignore delete mode 100644 ompi/mca/coll/libnbc/COPYRIGHT delete mode 100644 ompi/mca/coll/libnbc/LICENSE delete mode 100644 ompi/mca/coll/libnbc/Makefile.am delete mode 100644 ompi/mca/coll/libnbc/README delete mode 100644 ompi/mca/coll/libnbc/README.dist delete mode 100644 ompi/mca/coll/libnbc/c2f.m4 delete mode 100644 ompi/mca/coll/libnbc/configure.params delete mode 100644 ompi/mca/coll/libnbc/dict.c delete mode 100644 ompi/mca/coll/libnbc/dict.h delete mode 100644 ompi/mca/coll/libnbc/dict_private.h delete mode 100644 ompi/mca/coll/libnbc/hb_tree.c delete mode 100644 ompi/mca/coll/libnbc/hb_tree.h delete mode 100644 ompi/mca/coll/libnbc/ib.c delete mode 100644 ompi/mca/coll/libnbc/ib.h delete mode 100644 ompi/mca/coll/libnbc/ib_main.c delete mode 100644 ompi/mca/coll/libnbc/main.c delete mode 100644 ompi/mca/coll/libnbc/nbc.c delete mode 100644 ompi/mca/coll/libnbc/nbc.h delete mode 100644 ompi/mca/coll/libnbc/nbc_iallgather.c delete mode 100644 ompi/mca/coll/libnbc/nbc_iallgatherv.c delete mode 100644 ompi/mca/coll/libnbc/nbc_iallreduce.c delete mode 100644 ompi/mca/coll/libnbc/nbc_ialltoall.c delete mode 100644 ompi/mca/coll/libnbc/nbc_ialltoallv.c delete mode 100644 ompi/mca/coll/libnbc/nbc_ibarrier.c delete mode 100644 ompi/mca/coll/libnbc/nbc_ibcast.c delete mode 100644 ompi/mca/coll/libnbc/nbc_igather.c delete mode 100644 ompi/mca/coll/libnbc/nbc_igatherv.c delete mode 100644 ompi/mca/coll/libnbc/nbc_ireduce.c delete mode 100644 ompi/mca/coll/libnbc/nbc_ireduce_scatter.c delete mode 100644 ompi/mca/coll/libnbc/nbc_iscan.c delete mode 100644 ompi/mca/coll/libnbc/nbc_iscatter.c delete mode 100644 ompi/mca/coll/libnbc/nbc_iscatterv.c delete mode 100644 ompi/mca/coll/libnbc/nbc_op.c delete mode 100644 ompi/mca/coll/libnbc/nbc_op.c.m4 delete mode 100644 ompi/mca/coll/libnbc/ompi_component.c delete mode 100644 ompi/mca/coll/libnbc/ompi_component.h diff --git a/ompi/mca/coll/libnbc/.ompi_ignore b/ompi/mca/coll/libnbc/.ompi_ignore deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ompi/mca/coll/libnbc/COPYRIGHT b/ompi/mca/coll/libnbc/COPYRIGHT deleted file mode 100644 index 6d24e8449a..0000000000 --- a/ompi/mca/coll/libnbc/COPYRIGHT +++ /dev/null @@ -1,7 +0,0 @@ -/* - * Copyright (c) 2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All - * rights reserved. - */ diff --git a/ompi/mca/coll/libnbc/LICENSE b/ompi/mca/coll/libnbc/LICENSE deleted file mode 100644 index 45a1cc7b91..0000000000 --- a/ompi/mca/coll/libnbc/LICENSE +++ /dev/null @@ -1,40 +0,0 @@ -Copyright (c) 2006 The Trustees of Indiana University and Indiana - University Research and Technology Corporation. All - rights reserved. -Copyright (c) 2006 The Technical University of Chemnitz. All rights - reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -- Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer listed - in this license in the documentation and/or other materials - provided with the distribution. - -- Neither the name of the copyright holders nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -The copyright holders provide no reassurances that the source code -provided does not infringe any patent, copyright, or any other -intellectual property rights of third parties. The copyright holders -disclaim any liability to any recipient for claims brought against -recipient by any third party for infringement of that parties -intellectual property rights. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ompi/mca/coll/libnbc/Makefile.am b/ompi/mca/coll/libnbc/Makefile.am deleted file mode 100644 index fd3a5900ed..0000000000 --- a/ompi/mca/coll/libnbc/Makefile.am +++ /dev/null @@ -1,64 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - dict.c \ - hb_tree.c \ - nbc.c \ - nbc_iallgather.c \ - nbc_iallgatherv.c \ - nbc_iallreduce.c \ - nbc_ialltoall.c \ - nbc_ialltoallv.c \ - nbc_ibarrier.c \ - nbc_ibcast.c \ - nbc_igather.c \ - nbc_igatherv.c \ - nbc_ireduce.c \ - nbc_ireduce_scatter.c \ - nbc_iscan.c \ - nbc_iscatter.c \ - nbc_iscatterv.c \ - nbc_op.c \ - ompi_component.c \ - ompi_component.h \ - dict.h \ - dict_private.h \ - hb_tree.h \ - nbc.h - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_coll_libnbc_DSO -component_noinst = -component_install = mca_coll_libnbc.la -else -component_noinst = libmca_coll_libnbc.la -component_install = -endif - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_coll_libnbc_la_SOURCES = $(sources) -mca_coll_libnbc_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_coll_libnbc_la_SOURCES =$(sources) -libmca_coll_libnbc_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/coll/libnbc/README b/ompi/mca/coll/libnbc/README deleted file mode 100644 index 87cf8e83fc..0000000000 --- a/ompi/mca/coll/libnbc/README +++ /dev/null @@ -1,17 +0,0 @@ -* TODO: -- support MPI-2 collectives -- support MPI-2 Features (MPI_IN_PLACE) -- support MPI-2 Requests (really? -> I don't think so :) - -* Missing for MPI-1: -- FORTRAN Bindings -- add user defined operations (coll9, coll10, coll11, longuser) --- how do we ensure that we do not collide with Intrinsic Operations if - we issue NBC_Ops??? --- we cannot issue NBC_Ops ... we need to issue MPI_Ops :-(. --- hmm, we could simply wrap it and save the user defined op in a - list (hash) and search this every time we get called - --> cool idea, let's do that ... - -* No Idea: -- what is wrong with nbcoll (does not work with Open MPI blocking colls) diff --git a/ompi/mca/coll/libnbc/README.dist b/ompi/mca/coll/libnbc/README.dist deleted file mode 100644 index e2f781e96f..0000000000 --- a/ompi/mca/coll/libnbc/README.dist +++ /dev/null @@ -1,12 +0,0 @@ -Building LibNBC: -- set the right MPI-c Compilerwrapper in the Makefile (default: mpicc) -- build libnbc.a with 'make' - -Linking Programs with LibNBC: -- import nbc.h -- link with -lnbc - -Further Information and examples: -- http://www.unixer.de/NBC - -SVN Version: r60 diff --git a/ompi/mca/coll/libnbc/c2f.m4 b/ompi/mca/coll/libnbc/c2f.m4 deleted file mode 100644 index c8709c7348..0000000000 --- a/ompi/mca/coll/libnbc/c2f.m4 +++ /dev/null @@ -1,12 +0,0 @@ -dnl/* -dnl * Copyright (c) 2006 The Trustees of Indiana University and Indiana -dnl * University Research and Technology -dnl * Corporation. All rights reserved. -dnl * Copyright (c) 2006 The Technical University of Chemnitz. All -dnl * rights reserved. -dnl */ -dnl -define(MPI_Comm, int)dnl -define(MPI_Op, int)dnl -define(MPI_Datatype, int)dnl -define(MPI_Request, int)dnl diff --git a/ompi/mca/coll/libnbc/configure.params b/ompi/mca/coll/libnbc/configure.params deleted file mode 100644 index e35a45dd44..0000000000 --- a/ompi/mca/coll/libnbc/configure.params +++ /dev/null @@ -1,22 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -PARAM_CONFIG_FILES=Makefile diff --git a/ompi/mca/coll/libnbc/dict.c b/ompi/mca/coll/libnbc/dict.c deleted file mode 100644 index d4f8664602..0000000000 --- a/ompi/mca/coll/libnbc/dict.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * dict.c - * - * Implementation of generic dictionary routines. - * Copyright (C) 2001-2004 Farooq Mela. - * - * $Id: dict.c,v 1.7 2001/11/25 06:00:49 farooq Exp farooq $ - */ - -#include - -#include "dict.h" -#include "dict_private.h" - -dict_malloc_func _dict_malloc = malloc; -dict_free_func _dict_free = free; - -dict_malloc_func -dict_set_malloc(dict_malloc_func func) -{ - dict_malloc_func old = _dict_malloc; - _dict_malloc = func ? func : malloc; - return old; -} - -dict_free_func -dict_set_free(dict_free_func func) -{ - dict_free_func old = _dict_free; - _dict_free = func ? func : free; - return old; -} - -/* - * In comparing, we cannot simply subtract because that might result in signed - * overflow. - */ -int -dict_int_cmp(const void *k1, const void *k2) -{ - const int *a = k1, *b = k2; - - return (*a < *b) ? -1 : (*a > *b) ? +1 : 0; -} - -int -dict_uint_cmp(const void *k1, const void *k2) -{ - const unsigned int *a = k1, *b = k2; - - return (*a < *b) ? -1 : (*a > *b) ? +1 : 0; -} - -int -dict_long_cmp(const void *k1, const void *k2) -{ - const long *a = k1, *b = k2; - - return (*a < *b) ? -1 : (*a > *b) ? +1 : 0; -} - -int -dict_ulong_cmp(const void *k1, const void *k2) -{ - const unsigned long *a = k1, *b = k2; - - return (*a < *b) ? -1 : (*a > *b) ? +1 : 0; -} - -int -dict_ptr_cmp(const void *k1, const void *k2) -{ - return (k1 > k2) - (k1 < k2); -} - -int -dict_str_cmp(const void *k1, const void *k2) -{ - const char *a = k1, *b = k2; - char p, q; - - for (;;) { - p = *a++; q = *b++; - if (p == 0 || p != q) - break; - } - return (p > q) - (p < q); -} - -void -dict_destroy(dict *dct, int del) -{ - ASSERT(dct != NULL); - - dct->_destroy(dct->_object, del); - FREE(dct); -} - -void -dict_itor_destroy(dict_itor *itor) -{ - ASSERT(itor != NULL); - - itor->_destroy(itor->_itor); - FREE(itor); -} diff --git a/ompi/mca/coll/libnbc/dict.h b/ompi/mca/coll/libnbc/dict.h deleted file mode 100644 index 4680b2bc2c..0000000000 --- a/ompi/mca/coll/libnbc/dict.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * dict.h - * - * Interface for generic access to dictionary library. - * Copyright (C) 2001-2004 Farooq Mela. - * - * $Id: dict.h,v 1.6 2001/11/14 05:21:10 farooq Exp farooq $ - */ - -#ifndef _DICT_H_ -#define _DICT_H_ - -#include - -#define DICT_VERSION_MAJOR 0 -#define DICT_VERSION_MINOR 2 -#define DICT_VERSION_PATCH 1 - -#ifndef __P -# if defined(__STDC__) || defined(__cplusplus) || defined(c_plusplus) || \ - defined(_MSC_VER) -# define __P(x) x -# else /* !__STDC__ && !__cplusplus && !c_plusplus && !_MSC_VER */ -# define __P(x) -# endif -#endif /* !__P */ - -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE (!FALSE) -#endif - -#if defined(__cplusplus) || defined(c_plusplus) -# define BEGIN_DECL extern "C" { -# define END_DECL } -#else -# define BEGIN_DECL -# define END_DECL -#endif - -BEGIN_DECL - -typedef void *(*dict_malloc_func)(size_t); -typedef void (*dict_free_func)(void *); - -dict_malloc_func dict_set_malloc __P((dict_malloc_func func)); -dict_free_func dict_set_free __P((dict_free_func func)); - -typedef int (*dict_cmp_func) __P((const void *, const void *)); -typedef void (*dict_del_func) __P((void *)); -typedef int (*dict_vis_func) __P((const void *, void *)); -typedef unsigned (*dict_hsh_func) __P((const void *)); - -typedef struct dict dict; -typedef struct dict_itor dict_itor; - -struct dict { - void *_object; - int (*_insert) __P((void *obj, void *k, void *d, int ow)); - int (*_probe) __P((void *obj, void *key, void **dat)); - void *(*_search) __P((void *obj, const void *k)); - const void *(*_csearch) __P((const void *obj, const void *k)); - int (*_remove) __P((void *obj, const void *key, int del)); - void (*_walk) __P((void *obj, dict_vis_func func)); - unsigned (*_count) __P((const void *obj)); - void (*_empty) __P((void *obj, int del)); - void (*_destroy) __P((void *obj, int del)); - dict_itor *(*_inew) __P((void *obj)); -}; - -#define dict_private(dct) (dct)->_object -#define dict_insert(dct,k,d,o) (dct)->_insert((dct)->_object, (k), (d), (o)) -#define dict_probe(dct,k,d) (dct)->_probe((dct)->_object, (k), (d)) -#define dict_search(dct,k) (dct)->_search((dct)->_object, (k)) -#define dict_csearch(dct,k) (dct)->_csearch((dct)->_object, (k)) -#define dict_remove(dct,k,del) (dct)->_remove((dct)->_object, (k), (del)) -#define dict_walk(dct,f) (dct)->_walk((dct)->_object, (f)) -#define dict_count(dct) (dct)->_count((dct)->_object) -#define dict_empty(dct,d) (dct)->_empty((dct)->_object, (d)) -void dict_destroy __P((dict *dct, int del)); -#define dict_itor_new(dct) (dct)->_inew((dct)->_object) - -struct dict_itor { - void *_itor; - int (*_valid) __P((const void *itor)); - void (*_invalid) __P((void *itor)); - int (*_next) __P((void *itor)); - int (*_prev) __P((void *itor)); - int (*_nextn) __P((void *itor, unsigned count)); - int (*_prevn) __P((void *itor, unsigned count)); - int (*_first) __P((void *itor)); - int (*_last) __P((void *itor)); - int (*_search) __P((void *itor, const void *key)); - const void *(*_key) __P((void *itor)); - void *(*_data) __P((void *itor)); - const void *(*_cdata) __P((const void *itor)); - int (*_setdata) __P((void *itor, void *dat, int del)); - int (*_remove) __P((void *itor, int del)); - int (*_compare) __P((void *itor1, void *itor2)); - void (*_destroy) __P((void *itor)); -}; - -#define dict_itor_private(i) (i)->_itor -#define dict_itor_valid(i) (i)->_valid((i)->_itor) -#define dict_itor_invalidate(i) (i)->_invalid((i)->_itor) -#define dict_itor_next(i) (i)->_next((i)->_itor) -#define dict_itor_prev(i) (i)->_prev((i)->_itor) -#define dict_itor_nextn(i,n) (i)->_nextn((i)->_itor, (n)) -#define dict_itor_prevn(i,n) (i)->_prevn((i)->_itor, (n)) -#define dict_itor_first(i) (i)->_first((i)->_itor) -#define dict_itor_last(i) (i)->_last((i)->_itor) -#define dict_itor_search(i,k) (i)->_search((i)->_itor, (k)) -#define dict_itor_key(i) (i)->_key((i)->_itor) -#define dict_itor_data(i) (i)->_data((i)->_itor) -#define dict_itor_cdata(i) (i)->_cdata((i)->_itor) -#define dict_itor_set_data(i,dat,d) (i)->_setdata((i)->_itor, (dat), (d)) -#define dict_itor_remove(i) (i)->_remove((i)->_itor) -void dict_itor_destroy __P((dict_itor *itor)); - -int dict_int_cmp __P((const void *k1, const void *k2)); -int dict_uint_cmp __P((const void *k1, const void *k2)); -int dict_long_cmp __P((const void *k1, const void *k2)); -int dict_ulong_cmp __P((const void *k1, const void *k2)); -int dict_ptr_cmp __P((const void *k1, const void *k2)); -int dict_str_cmp __P((const void *k1, const void *k2)); - -END_DECL - -/*#include "hashtable.h"*/ -#include "hb_tree.h" -/*#include "pr_tree.h" -#include "rb_tree.h" -#include "sp_tree.h" -#include "tr_tree.h" -#include "wb_tree.h"*/ - -#endif /* !_DICT_H_ */ diff --git a/ompi/mca/coll/libnbc/dict_private.h b/ompi/mca/coll/libnbc/dict_private.h deleted file mode 100644 index f1fb5b5d58..0000000000 --- a/ompi/mca/coll/libnbc/dict_private.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * dict_private.h - * - * Private definitions for libdict. - * Copyright (C) 2001 Farooq Mela. - * - * $Id: dict_private.h,v 1.8 2002/01/02 09:14:11 farooq Exp $ - */ - -#ifndef _DICT_PRIVATE_H_ -#define _DICT_PRIVATE_H_ - -#include "dict.h" - -typedef int (*insert_func) __P((void *, void *k, void *d, int o)); -typedef int (*probe_func) __P((void *, void *k, void **d)); -typedef void *(*search_func) __P((void *, const void *k)); -typedef const void *(*csearch_func) __P((const void *, const void *k)); -typedef int (*remove_func) __P((void *, const void *k, int d)); -typedef void (*walk_func) __P((void *, dict_vis_func visit)); -typedef unsigned (*count_func) __P((const void *)); -typedef void (*empty_func) __P((void *, int del)); -typedef void (*destroy_func) __P((void *, int del)); -typedef dict_itor *(*inew_func) __P((void *)); - -typedef void (*idestroy_func) __P((void *)); -typedef int (*valid_func) __P((const void *)); -typedef void (*invalidate_func) __P((void *)); -typedef int (*next_func) __P((void *)); -typedef int (*prev_func) __P((void *)); -typedef int (*nextn_func) __P((void *, unsigned count)); -typedef int (*prevn_func) __P((void *, unsigned count)); -typedef int (*first_func) __P((void *)); -typedef int (*last_func) __P((void *)); -typedef int (*isearch_func) __P((void *, const void *k)); -typedef const void *(*key_func) __P((void *)); -typedef void *(*data_func) __P((void *)); -typedef const void *(*cdata_func) __P((const void *)); -typedef int (*dataset_func) __P((void *, void *d, int del)); -typedef int (*iremove_func) __P((void *, int del)); -typedef int (*icompare_func) __P((void *, void *itor2)); - -#ifndef NDEBUG -# include -# undef ASSERT -# if defined(__GNUC__) -# define ASSERT(expr) \ - if (!(expr)) \ - fprintf(stderr, "\n%s:%d (%s) assertion failed: `%s'\n", \ - __FILE__, __LINE__, __PRETTY_FUNCTION__, #expr), \ - abort() -# else -# define ASSERT(expr) \ - if (!(expr)) \ - fprintf(stderr, "\n%s:%d assertion failed: `%s'\n", \ - __FILE__, __LINE__, #expr), \ - abort() -# endif -#else -# define ASSERT(expr) -#endif - -extern dict_malloc_func _dict_malloc; -extern dict_free_func _dict_free; -#define MALLOC(n) (*_dict_malloc)(n) -#define FREE(p) (*_dict_free)(p) - -#define ABS(a) ((a) < 0 ? -(a) : +(a)) -#define MIN(a,b) ((a) < (b) ? (a) : (b)) -#define MAX(a,b) ((a) > (b) ? (a) : (b)) -#define SWAP(a,b,v) v = (a), (a) = (b), (b) = v -#define UNUSED(p) (void)&p - -#if defined(__GNUC__) -# define GCC_INLINE __inline__ -# define GCC_UNUSED __attribute__((__unused__)) -# define GCC_CONST __attribute__((__const__)) -#else -# define GCC_INLINE -# define GCC_UNUSED -# define GCC_CONST -#endif - -#endif /* !_DICT_PRIVATE_H_ */ diff --git a/ompi/mca/coll/libnbc/hb_tree.c b/ompi/mca/coll/libnbc/hb_tree.c deleted file mode 100644 index 780ffc6030..0000000000 --- a/ompi/mca/coll/libnbc/hb_tree.c +++ /dev/null @@ -1,906 +0,0 @@ -/* - * hb_tree.c - * - * Implementation of height balanced tree. - * Copyright (C) 2001-2004 Farooq Mela. - * - * $Id: hb_tree.c,v 1.10 2001/11/25 08:30:21 farooq Exp farooq $ - * - * cf. [Gonnet 1984], [Knuth 1998] - */ - -#include - -#include "hb_tree.h" -#include "dict_private.h" - -typedef signed char balance_t; - -typedef struct hb_node hb_node; - -struct hb_node { - void *key; - void *dat; - hb_node *parent; - hb_node *llink; - hb_node *rlink; - balance_t bal; -}; - -struct hb_tree { - hb_node *root; - unsigned count; - dict_cmp_func key_cmp; - dict_del_func key_del; - dict_del_func dat_del; -}; - -struct hb_itor { - hb_tree *tree; - hb_node *node; -}; - -static int rot_left __P((hb_tree *tree, hb_node *node)); -static int rot_right __P((hb_tree *tree, hb_node *node)); -static unsigned node_height __P((const hb_node *node)); -static unsigned node_mheight __P((const hb_node *node)); -static unsigned node_pathlen __P((const hb_node *node, unsigned level)); -static hb_node *node_new __P((void *key, void *dat)); -static hb_node *node_min __P((hb_node *node)); -static hb_node *node_max __P((hb_node *node)); -static hb_node *node_next __P((hb_node *node)); -static hb_node *node_prev __P((hb_node *node)); - -hb_tree * -hb_tree_new(dict_cmp_func key_cmp, dict_del_func key_del, - dict_del_func dat_del) -{ - hb_tree *tree; - - if ((tree = MALLOC(sizeof(*tree))) == NULL) - return NULL; - - tree->root = NULL; - tree->count = 0; - tree->key_cmp = key_cmp ? key_cmp : dict_ptr_cmp; - tree->key_del = key_del; - tree->dat_del = dat_del; - - return tree; -} - -dict * -hb_dict_new(dict_cmp_func key_cmp, dict_del_func key_del, - dict_del_func dat_del) -{ - dict *dct; - hb_tree *tree; - - if ((dct = MALLOC(sizeof(*dct))) == NULL) - return NULL; - - if ((tree = hb_tree_new(key_cmp, key_del, dat_del)) == NULL) { - FREE(dct); - return NULL; - } - - dct->_object = tree; - dct->_inew = (inew_func)hb_dict_itor_new; - dct->_destroy = (destroy_func)hb_tree_destroy; - dct->_insert = (insert_func)hb_tree_insert; - dct->_probe = (probe_func)hb_tree_probe; - dct->_search = (search_func)hb_tree_search; - dct->_csearch = (csearch_func)hb_tree_csearch; - dct->_remove = (remove_func)hb_tree_remove; - dct->_empty = (empty_func)hb_tree_empty; - dct->_walk = (walk_func)hb_tree_walk; - dct->_count = (count_func)hb_tree_count; - - return dct; -} - -void -hb_tree_destroy(hb_tree *tree, int del) -{ - ASSERT(tree != NULL); - - if (tree->root) - hb_tree_empty(tree, del); - - FREE(tree); -} - -void -hb_tree_empty(hb_tree *tree, int del) -{ - hb_node *node, *parent; - - ASSERT(tree != NULL); - - node = tree->root; - - while (node) { - if (node->llink || node->rlink) { - node = node->llink ? node->llink : node->rlink; - continue; - } - - if (del) { - if (tree->key_del) - tree->key_del(node->key); - if (tree->dat_del) - tree->dat_del(node->dat); - } - - parent = node->parent; - FREE(node); - - if (parent) { - if (parent->llink == node) - parent->llink = NULL; - else - parent->rlink = NULL; - } - node = parent; - } - - tree->root = NULL; - tree->count = 0; -} - -void * -hb_tree_search(hb_tree *tree, const void *key) -{ - int rv; - hb_node *node; - - ASSERT(tree != NULL); - - node = tree->root; - while (node) { - rv = tree->key_cmp(key, node->key); - if (rv < 0) - node = node->llink; - else if (rv > 0) - node = node->rlink; - else - return node->dat; - } - - return NULL; -} - -const void * -hb_tree_csearch(const hb_tree *tree, const void *key) -{ - return hb_tree_csearch((hb_tree *)tree, key); -} - -int -hb_tree_insert(hb_tree *tree, void *key, void *dat, int overwrite) -{ - int rv = 0; - hb_node *node, *parent = NULL, *q = NULL; - - ASSERT(tree != NULL); - - node = tree->root; - while (node) { - rv = tree->key_cmp(key, node->key); - if (rv < 0) - parent = node, node = node->llink; - else if (rv > 0) - parent = node, node = node->rlink; - else { - if (overwrite == 0) - return 1; - if (tree->key_del) - tree->key_del(node->key); - if (tree->dat_del) - tree->dat_del(node->dat); - node->key = key; - node->dat = dat; - return 0; - } - if (parent->bal) - q = parent; - } - - if ((node = node_new(key, dat)) == NULL) - return -1; - if ((node->parent = parent) == NULL) { - tree->root = node; - ASSERT(tree->count == 0); - tree->count = 1; - return 0; - } - if (rv < 0) - parent->llink = node; - else - parent->rlink = node; - - while (parent != q) { - parent->bal = (parent->rlink == node) * 2 - 1; - node = parent; - parent = node->parent; - } - if (q) { - if (q->llink == node) { - if (--q->bal == -2) { - if (q->llink->bal > 0) - rot_left(tree, q->llink); - rot_right(tree, q); - } - } else { - if (++q->bal == +2) { - if (q->rlink->bal < 0) - rot_right(tree, q->rlink); - rot_left(tree, q); - } - } - } - tree->count++; - return 0; -} - -int -hb_tree_probe(hb_tree *tree, void *key, void **dat) -{ - int rv = 0; - hb_node *node, *parent = NULL, *q = NULL; - - ASSERT(tree != NULL); - - node = tree->root; - while (node) { - rv = tree->key_cmp(key, node->key); - if (rv < 0) - parent = node, node = node->llink; - else if (rv > 0) - parent = node, node = node->rlink; - else { - *dat = node->dat; - return 0; - } - if (parent->bal) - q = parent; - } - - if ((node = node_new(key, *dat)) == NULL) - return -1; - if ((node->parent = parent) == NULL) { - tree->root = node; - ASSERT(tree->count == 0); - tree->count = 1; - return 1; - } - if (rv < 0) - parent->llink = node; - else - parent->rlink = node; - - while (parent != q) { - parent->bal = (parent->rlink == node) * 2 - 1; - node = parent; - parent = parent->parent; - } - if (q) { - if (q->llink == node) { - if (--q->bal == -2) { - if (q->llink->bal > 0) - rot_left(tree, q->llink); - rot_right(tree, q); - } - } else { - if (++q->bal == +2) { - if (q->rlink->bal < 0) - rot_right(tree, q->rlink); - rot_left(tree, q); - } - } - } - tree->count++; - return 1; -} - -#define FREE_NODE(n) \ - if (del) { \ - if (tree->key_del) \ - tree->key_del((n)->key); \ - if (tree->dat_del) \ - tree->dat_del((n)->dat); \ - } \ - FREE(n) - -int -hb_tree_remove(hb_tree *tree, const void *key, int del) -{ - int rv, left; - hb_node *node, *out, *parent = NULL; - void *tmp; - - ASSERT(tree != NULL); - - node = tree->root; - while (node) { - rv = tree->key_cmp(key, node->key); - if (rv == 0) - break; - parent = node; - node = rv < 0 ? node->llink : node->rlink; - } - if (node == NULL) - return -1; - - if (node->llink && node->rlink) { - for (out = node->rlink; out->llink; out = out->llink) - /* void */; - SWAP(node->key, out->key, tmp); - SWAP(node->dat, out->dat, tmp); - node = out; - parent = out->parent; - } - - out = node->llink ? node->llink : node->rlink; - FREE_NODE(node); - if (out) - out->parent = parent; - if (parent == NULL) { - tree->root = out; - tree->count--; - return 0; - } - - left = parent->llink == node; - if (left) - parent->llink = out; - else - parent->rlink = out; - - for (;;) { - if (left) { - if (++parent->bal == 0) { - node = parent; - goto higher; - } - if (parent->bal == +2) { - ASSERT(parent->rlink != NULL); - if (parent->rlink->bal < 0) { - rot_right(tree, parent->rlink); - rot_left(tree, parent); - } else { - ASSERT(parent->rlink->rlink != NULL); - if (rot_left(tree, parent) == 0) - break; - } - } else { - break; - } - } else { - if (--parent->bal == 0) { - node = parent; - goto higher; - } - if (parent->bal == -2) { - ASSERT(parent->llink != NULL); - if (parent->llink->bal > 0) { - rot_left(tree, parent->llink); - rot_right(tree, parent); - } else { - ASSERT(parent->llink->llink != NULL); - if (rot_right(tree, parent) == 0) - break; - } - } else { - break; - } - } - - /* Only get here on double rotations or single rotations that changed - * subtree height - in either event, `parent->parent' is positioned - * where `parent' was positioned before any rotations. */ - node = parent->parent; -higher: - if ((parent = node->parent) == NULL) - break; - left = parent->llink == node; - } - tree->count--; - return 0; -} - -const void * -hb_tree_min(const hb_tree *tree) -{ - const hb_node *node; - - ASSERT(tree != NULL); - - if (tree->root == NULL) - return NULL; - - for (node = tree->root; node->llink; node = node->llink) - /* void */; - return node->key; -} - -const void * -hb_tree_max(const hb_tree *tree) -{ - const hb_node *node; - - ASSERT(tree != NULL); - - if ((node = tree->root) == NULL) - return NULL; - - for (; node->rlink; node = node->rlink) - /* void */; - return node->key; -} - -void -hb_tree_walk(hb_tree *tree, dict_vis_func visit) -{ - hb_node *node; - - ASSERT(tree != NULL); - - if (tree->root == NULL) - return; - for (node = node_min(tree->root); node; node = node_next(node)) - if (visit(node->key, node->dat) == 0) - break; -} - -unsigned -hb_tree_count(const hb_tree *tree) -{ - ASSERT(tree != NULL); - - return tree->count; -} - -unsigned -hb_tree_height(const hb_tree *tree) -{ - ASSERT(tree != NULL); - - return tree->root ? node_height(tree->root) : 0; -} - -unsigned -hb_tree_mheight(const hb_tree *tree) -{ - ASSERT(tree != NULL); - - return tree->root ? node_mheight(tree->root) : 0; -} - -unsigned -hb_tree_pathlen(const hb_tree *tree) -{ - ASSERT(tree != NULL); - - return tree->root ? node_pathlen(tree->root, 1) : 0; -} - -static hb_node * -node_new(void *key, void *dat) -{ - hb_node *node; - - if ((node = MALLOC(sizeof(*node))) == NULL) - return NULL; - - node->key = key; - node->dat = dat; - node->parent = NULL; - node->llink = NULL; - node->rlink = NULL; - node->bal = 0; - - return node; -} - -static hb_node * -node_min(hb_node *node) -{ - ASSERT(node != NULL); - - while (node->llink) - node = node->llink; - return node; -} - -static hb_node * -node_max(hb_node *node) -{ - ASSERT(node != NULL); - - while (node->rlink) - node = node->rlink; - return node; -} - -static hb_node * -node_next(hb_node *node) -{ - hb_node *temp; - - ASSERT(node != NULL); - - if (node->rlink) { - for (node = node->rlink; node->llink; node = node->llink) - /* void */; - return node; - } - temp = node->parent; - while (temp && temp->rlink == node) { - node = temp; - temp = temp->parent; - } - return temp; -} - -static hb_node * -node_prev(hb_node *node) -{ - hb_node *temp; - - ASSERT(node != NULL); - - if (node->llink) { - for (node = node->llink; node->rlink; node = node->rlink) - /* void */; - return node; - } - temp = node->parent; - while (temp && temp->llink == node) { - node = temp; - temp = temp->parent; - } - return temp; -} - -static unsigned -node_height(const hb_node *node) -{ - unsigned l, r; - - ASSERT(node != NULL); - - l = node->llink ? node_height(node->llink) + 1 : 0; - r = node->rlink ? node_height(node->rlink) + 1 : 0; - return MAX(l, r); -} - -static unsigned -node_mheight(const hb_node *node) -{ - unsigned l, r; - - ASSERT(node != NULL); - - l = node->llink ? node_mheight(node->llink) + 1 : 0; - r = node->rlink ? node_mheight(node->rlink) + 1 : 0; - return MIN(l, r); -} - -static unsigned -node_pathlen(const hb_node *node, unsigned level) -{ - unsigned n = 0; - - ASSERT(node != NULL); - - if (node->llink) - n += level + node_pathlen(node->llink, level + 1); - if (node->rlink) - n += level + node_pathlen(node->rlink, level + 1); - return n; -} - -/* - * rot_left(T, B): - * - * / / - * B D - * / \ / \ - * A D ==> B E - * / \ / \ - * C E A C - * - */ -static int -rot_left(hb_tree *tree, hb_node *node) -{ - int hc; - hb_node *rlink, *parent; - - ASSERT(tree != NULL); - ASSERT(node != NULL); - ASSERT(node->rlink != NULL); - - rlink = node->rlink; - node->rlink = rlink->llink; - if (rlink->llink) - rlink->llink->parent = node; - parent = node->parent; - rlink->parent = parent; - if (parent) { - if (parent->llink == node) - parent->llink = rlink; - else - parent->rlink = rlink; - } else { - tree->root = rlink; - } - rlink->llink = node; - node->parent = rlink; - - hc = rlink->bal != 0; - node->bal -= 1 + MAX(rlink->bal, 0); - rlink->bal -= 1 - MIN(node->bal, 0); - return hc; -} - -/* - * rot_right(T, D): - * - * / / - * D B - * / \ / \ - * B E ==> A D - * / \ / \ - * A C C E - * - */ -static int -rot_right(hb_tree *tree, hb_node *node) -{ - int hc; - hb_node *llink, *parent; - - ASSERT(tree != NULL); - ASSERT(node != NULL); - ASSERT(node->llink != NULL); - - llink = node->llink; - node->llink = llink->rlink; - if (llink->rlink) - llink->rlink->parent = node; - parent = node->parent; - llink->parent = parent; - if (parent) { - if (parent->llink == node) - parent->llink = llink; - else - parent->rlink = llink; - } else { - tree->root = llink; - } - llink->rlink = node; - node->parent = llink; - - hc = llink->bal != 0; - node->bal += 1 - MIN(llink->bal, 0); - llink->bal += 1 + MAX(node->bal, 0); - return hc; -} - -hb_itor * -hb_itor_new(hb_tree *tree) -{ - hb_itor *itor; - - ASSERT(tree != NULL); - - if ((itor = MALLOC(sizeof(*itor))) == NULL) - return NULL; - - itor->tree = tree; - hb_itor_first(itor); - return itor; -} - -dict_itor * -hb_dict_itor_new(hb_tree *tree) -{ - dict_itor *itor; - - ASSERT(tree != NULL); - - if ((itor = MALLOC(sizeof(*itor))) == NULL) - return NULL; - - if ((itor->_itor = hb_itor_new(tree)) == NULL) { - FREE(itor); - return NULL; - } - - itor->_destroy = (idestroy_func)hb_itor_destroy; - itor->_valid = (valid_func)hb_itor_valid; - itor->_invalid = (invalidate_func)hb_itor_invalidate; - itor->_next = (next_func)hb_itor_next; - itor->_prev = (prev_func)hb_itor_prev; - itor->_nextn = (nextn_func)hb_itor_nextn; - itor->_prevn = (prevn_func)hb_itor_prevn; - itor->_first = (first_func)hb_itor_first; - itor->_last = (last_func)hb_itor_last; - itor->_search = (isearch_func)hb_itor_search; - itor->_key = (key_func)hb_itor_key; - itor->_data = (data_func)hb_itor_data; - itor->_cdata = (cdata_func)hb_itor_cdata; - itor->_setdata = (dataset_func)hb_itor_set_data; - - return itor; -} - -void -hb_itor_destroy(hb_itor *itor) -{ - ASSERT(itor != NULL); - - FREE(itor); -} - -#define RETVALID(itor) return itor->node != NULL - -int -hb_itor_valid(const hb_itor *itor) -{ - ASSERT(itor != NULL); - - RETVALID(itor); -} - -void -hb_itor_invalidate(hb_itor *itor) -{ - ASSERT(itor != NULL); - - itor->node = NULL; -} - -int -hb_itor_next(hb_itor *itor) -{ - ASSERT(itor != NULL); - - if (itor->node == NULL) - hb_itor_first(itor); - else - itor->node = node_next(itor->node); - RETVALID(itor); -} - -int -hb_itor_prev(hb_itor *itor) -{ - ASSERT(itor != NULL); - - if (itor->node == NULL) - hb_itor_last(itor); - else - itor->node = node_prev(itor->node); - RETVALID(itor); -} - -int -hb_itor_nextn(hb_itor *itor, unsigned count) -{ - ASSERT(itor != NULL); - - if (count) { - if (itor->node == NULL) { - hb_itor_first(itor); - count--; - } - - while (count-- && itor->node) - itor->node = node_next(itor->node); - } - - RETVALID(itor); -} - -int -hb_itor_prevn(hb_itor *itor, unsigned count) -{ - ASSERT(itor != NULL); - - if (count) { - if (itor->node == NULL) { - hb_itor_last(itor); - count--; - } - - while (count-- && itor->node) - itor->node = node_prev(itor->node); - } - - RETVALID(itor); -} - -int -hb_itor_first(hb_itor *itor) -{ - hb_tree *t; - - ASSERT(itor != NULL); - - t = itor->tree; - itor->node = t->root ? node_min(t->root) : NULL; - RETVALID(itor); -} - -int -hb_itor_last(hb_itor *itor) -{ - hb_tree *t; - - ASSERT(itor != NULL); - - t = itor->tree; - itor->node = t->root ? node_max(t->root) : NULL; - RETVALID(itor); -} - -int -hb_itor_search(hb_itor *itor, const void *key) -{ - int rv; - hb_node *node; - dict_cmp_func cmp; - - ASSERT(itor != NULL); - - cmp = itor->tree->key_cmp; - for (node = itor->tree->root; node;) { - rv = cmp(key, node->key); - if (rv == 0) - break; - node = rv < 0 ? node->llink : node->rlink; - } - itor->node = node; - RETVALID(itor); -} - -const void * -hb_itor_key(const hb_itor *itor) -{ - ASSERT(itor != NULL); - - return itor->node ? itor->node->key : NULL; -} - -void * -hb_itor_data(hb_itor *itor) -{ - ASSERT(itor != NULL); - - return itor->node ? itor->node->dat : NULL; -} - -const void * -hb_itor_cdata(const hb_itor *itor) -{ - ASSERT(itor != NULL); - - return itor->node ? itor->node->dat : NULL; -} - -int -hb_itor_set_data(hb_itor *itor, void *dat, int del) -{ - ASSERT(itor != NULL); - - if (itor->node == NULL) - return -1; - - if (del && itor->tree->dat_del) - itor->tree->dat_del(itor->node->dat); - itor->node->dat = dat; - return 0; -} diff --git a/ompi/mca/coll/libnbc/hb_tree.h b/ompi/mca/coll/libnbc/hb_tree.h deleted file mode 100644 index 2de8af6d19..0000000000 --- a/ompi/mca/coll/libnbc/hb_tree.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * hb_tree.h - * - * Interface for height balanced tree. - * Copyright (C) 2001 Farooq Mela. - * - * $Id: hb_tree.h,v 1.2 2001/09/10 06:46:41 farooq Exp $ - */ - -#ifndef _HB_TREE_H_ -#define _HB_TREE_H_ - -#include "dict.h" - -BEGIN_DECL - -struct hb_tree; -typedef struct hb_tree hb_tree; - -hb_tree *hb_tree_new __P((dict_cmp_func key_cmp, dict_del_func key_del, - dict_del_func dat_del)); -dict *hb_dict_new __P((dict_cmp_func key_cmp, dict_del_func key_del, - dict_del_func dat_del)); -void hb_tree_destroy __P((hb_tree *tree, int del)); - -int hb_tree_insert __P((hb_tree *tree, void *key, void *dat, int overwrite)); -int hb_tree_probe __P((hb_tree *tree, void *key, void **dat)); -void *hb_tree_search __P((hb_tree *tree, const void *key)); -const void *hb_tree_csearch __P((const hb_tree *tree, const void *key)); -int hb_tree_remove __P((hb_tree *tree, const void *key, int del)); -void hb_tree_empty __P((hb_tree *tree, int del)); -void hb_tree_walk __P((hb_tree *tree, dict_vis_func visit)); -unsigned hb_tree_count __P((const hb_tree *tree)); -unsigned hb_tree_height __P((const hb_tree *tree)); -unsigned hb_tree_mheight __P((const hb_tree *tree)); -unsigned hb_tree_pathlen __P((const hb_tree *tree)); -const void *hb_tree_min __P((const hb_tree *tree)); -const void *hb_tree_max __P((const hb_tree *tree)); - -struct hb_itor; -typedef struct hb_itor hb_itor; - -hb_itor *hb_itor_new __P((hb_tree *tree)); -dict_itor *hb_dict_itor_new __P((hb_tree *tree)); -void hb_itor_destroy __P((hb_itor *tree)); - -int hb_itor_valid __P((const hb_itor *itor)); -void hb_itor_invalidate __P((hb_itor *itor)); -int hb_itor_next __P((hb_itor *itor)); -int hb_itor_prev __P((hb_itor *itor)); -int hb_itor_nextn __P((hb_itor *itor, unsigned count)); -int hb_itor_prevn __P((hb_itor *itor, unsigned count)); -int hb_itor_first __P((hb_itor *itor)); -int hb_itor_last __P((hb_itor *itor)); -int hb_itor_search __P((hb_itor *itor, const void *key)); -const void *hb_itor_key __P((const hb_itor *itor)); -void *hb_itor_data __P((hb_itor *itor)); -const void *hb_itor_cdata __P((const hb_itor *itor)); -int hb_itor_set_data __P((hb_itor *itor, void *dat, int del)); -int hb_itor_remove __P((hb_itor *itor, int del)); - -END_DECL - -#endif /* !_HB_TREE_H_ */ diff --git a/ompi/mca/coll/libnbc/ib.c b/ompi/mca/coll/libnbc/ib.c deleted file mode 100644 index 55dcc7944f..0000000000 --- a/ompi/mca/coll/libnbc/ib.c +++ /dev/null @@ -1,1142 +0,0 @@ -#define IB -#include "ib.h" - -double t; - -/* is IB initialized? */ -static int IB_Ginitialized=0; -/* the keyval (global) */ -static int IB_Gkeyval=MPI_KEYVAL_INVALID; - -static struct { - VAPI_hca_hndl_t hca_hndl; - VAPI_pd_hndl_t pd_hndl; - - hb_tree *memlist; /* this is the libdict structure to hang off the search tree */ -} IB_Hca_info; - -/* function definitions */ -static __inline__ void IB_Memlist_memlist_delete(IB_Memlistel *entry); -static __inline__ void IB_Memlist_delete_key(IB_Memlistel *k); -static __inline__ int IB_Memlist_compare_entries(IB_Memlistel *a, IB_Memlistel *b, void *param); -static __inline__ void IB_Taglist_delete(IB_Taglstel *entry); -static __inline__ void IB_Taglist_delete_key(IB_Taglstel *k); -static __inline__ int IB_Taglist_compare_entries(IB_Taglstel *a, IB_Taglstel *b, void *param); - -#if 0 -static __inline__ int IB_Addtotaglst(IB_Comminfo *comminfo, int tag, IB_Req *req, int peer) { - IB_Taglstel *new; - - new = malloc(sizeof(IB_Taglstel)); - new->tag = tag; - new->peer = peer; - new->req = req; - new->next = NULL; - - /* first element in list */ - if(comminfo->taglistend == NULL) { - comminfo->taglisthead = new; - } else { - comminfo->taglistend->next = new; - } - //printf("added tag %i/peer %i as new element to taglist\n", tag, peer); - comminfo->taglistend = new; - - return IB_OK; -} - -static __inline__ IB_Req *IB_Gettagreq(IB_Comminfo *comminfo, int tag, int peer) { - IB_Taglstel *ptr, *tmp; - IB_Req *req; - - /* empty list */ - if(comminfo->taglisthead == NULL) return NULL; - - ptr = comminfo->taglisthead; - /* tag/peer is in first element */ - if((ptr->tag == tag) && (ptr->peer == peer)) { - req = comminfo->taglisthead->req; - comminfo->taglisthead = ptr->next; - /* list is empty */ - if(comminfo->taglisthead == NULL) comminfo->taglistend = NULL; - free(ptr); - //printf("removed tag %i\n", tag); - return req; - } - - while ((ptr->next != NULL)) { - if((ptr->next->tag == tag) && (ptr->next->peer == peer)) break; - ptr = ptr->next; - } - - if(ptr->next == NULL) - /* we did not find it */ - return NULL; - else { - /* we found it somewhere in the middle */ - req = ptr->next->req; - tmp = ptr->next; - ptr->next = ptr->next->next; - /* it was the last element */ - if(ptr->next == NULL) comminfo->taglistend = ptr; - free(tmp); - //printf("removed tag %i\n", tag); - return req; - } -} -#endif - -static int IB_Create_qp( int rank, int remote, VAPI_hca_hndl_t *hca_hndl_p, VAPI_cq_hndl_t *sr_cq_hndl_p, VAPI_cq_hndl_t *rr_cq_hndl_p, VAPI_qp_hndl_t *qp_hndl_p, VAPI_pd_hndl_t *pd_hndp_p, MPI_Comm comm); - -static int IB_Key_copy(MPI_Comm oldcomm, int keyval, void *extra_state, void *attribute_val_in, void *attribute_val_out, int *flag) { - /* delete the attribute in the new comm - it will be created at the - * first usage */ - *flag = 0; - - return MPI_SUCCESS; -} - -static int IB_Key_delete(MPI_Comm comm, int keyval, void *attribute_val, void *extra_state) { - IB_Comminfo *comminfo; - - if(keyval == IB_Gkeyval) { - comminfo=(IB_Comminfo*)attribute_val; - free(comminfo); - } else { - printf("Got wrong keyval!(%i)\n", keyval); - } - - return MPI_SUCCESS; -} - -static __inline__ void IB_stat( VAPI_ret_t ret, char *string ) { - int rank; - - if( ret != VAPI_OK) { - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); - printf("[%u]: *** [ERROR] *** %s -> %s (%s)\n", rank, string, VAPI_strerror( ret ), VAPI_strerror_sym( ret )); -// return; - MPI_Finalize(); - exit(1); - } -} - -static __inline__ void IB_CQ_stat( VAPI_wc_desc_t ret, char *string ) { - int rank; - - if(ret.status != VAPI_SUCCESS) { - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); - printf("[%u]: *** [ERROR] *** %s -> %s\n", rank, string, VAPI_wc_status_sym( ret.status )); - printf("[%u]: *** [ERROR] *** opcode -> %s\n", rank, VAPI_cqe_opcode_sym(ret.opcode)); -// return; - MPI_Finalize(); - exit(1); - } -} - -static int IB_Init() { - u_int32_t num_of_hcas; /* actual number of hcas */ - VAPI_hca_id_t *hca_id_buf_p; /* HCA result buffer */ - int ret, res; - - /* keyval is not initialized yet, we have to init it */ - if(MPI_KEYVAL_INVALID == IB_Gkeyval) { - res = MPI_Keyval_create(IB_Key_copy, IB_Key_delete, &(IB_Gkeyval), NULL); - if((MPI_SUCCESS != res)) { printf("Error in MPI_Keyval_create() (%i)\n", res); return IB_OOR; } - } - - hca_id_buf_p = malloc(sizeof(VAPI_hca_id_t) * 2); - - /* get all HCAs */ - ret = EVAPI_list_hcas( (u_int32_t)2, - &num_of_hcas, - hca_id_buf_p ); - if( ret == VAPI_OK) { - fprintf(stderr, "[INFO] found %d adapter(s), first-name: %s\n", num_of_hcas, (char *)hca_id_buf_p); - } else { - IB_stat( ret, "EVAPI_list_hcas()" ); - } - - /* get handle of first HCA */ - ret = EVAPI_get_hca_hndl( *hca_id_buf_p, &IB_Hca_info.hca_hndl ); - IB_stat( ret, "EVAPI_get_hca_hndl()" ); - - // allocate PD - ret = VAPI_alloc_pd( IB_Hca_info.hca_hndl, - &IB_Hca_info.pd_hndl ); - IB_stat( ret, "VAPI_alloc_pd()" ); - - IB_Hca_info.memlist = hb_tree_new((dict_cmp_func)IB_Memlist_compare_entries, (void *) IB_Memlist_delete_key, (void *)IB_Memlist_memlist_delete); - if(IB_Hca_info.memlist == NULL) { printf("error in hb_dict_new()\n"); return IB_OOR; } - - IB_Ginitialized = 1; - - return IB_OK; -} - -static __inline__ IB_Comminfo *IB_Comm_init(MPI_Comm comm) { - IB_Comminfo *comminfo; - int res, flag; - - if(!IB_Ginitialized) IB_Init(); - - res = MPI_Attr_get(comm, IB_Gkeyval, &comminfo, &flag); - if((MPI_SUCCESS != res)) { printf("Error in MPI_Attr_get() (%i)\n", res); return NULL; } - if (!flag) { - VAPI_cqe_num_t num_of_entries_p; /* # CQ entries */ - VAPI_mrw_t req_mrw_p; - VAPI_mrw_t rep_mrw_p; /* responded memory region */ - IB_Peer_info *a2abuf1, *a2abuf2, *a2abuf3; - int p, i, j, rank; - - res = MPI_Comm_size(comm, &p); - res = MPI_Comm_rank(comm, &rank); - - /* we have to create a new one */ - comminfo = malloc(sizeof(IB_Comminfo)); - if(comminfo == NULL) { printf("Error in malloc()\n"); return NULL; } - - comminfo->taglisthead = NULL; - comminfo->taglistend = NULL; - - //printf("[%i] build up %i connections in comm %p \n", rank, p-1, comm); - /* allocate QPs */ - comminfo->qp_hndl_arr = malloc(p*sizeof(VAPI_qp_hndl_t)); - if(comminfo->qp_hndl_arr == NULL) { printf("malloc() error\n"); return NULL; } - /* allocate SR CQs */ - comminfo->sr_cq_hndl_arr = malloc(p*sizeof(VAPI_cq_hndl_t)); - if(comminfo->sr_cq_hndl_arr == NULL) { printf("malloc() error\n"); return NULL; } - /* allocate RR CQs */ - comminfo->rr_cq_hndl_arr = malloc(p*sizeof(VAPI_cq_hndl_t)); - if(comminfo->rr_cq_hndl_arr == NULL) { printf("malloc() error\n"); return NULL; } - /* allocate a tag list for each host */ - comminfo->taglist=malloc(p*sizeof(hb_tree*)); - if(comminfo->taglist == NULL) { printf("malloc() error\n"); return NULL; } - for(i=0; itaglist[i] = hb_tree_new((dict_cmp_func)IB_Taglist_compare_entries, (void *) IB_Taglist_delete_key, (void *)IB_Taglist_delete); - if(comminfo->taglist[i] == NULL) { printf("hb_tree_new() error\n"); return NULL; } - } - - /* allocate rtr send queue */ - comminfo->rtr_send=malloc(IB_RTR_SIZE*sizeof(IB_Peer_info_tag)); - if(comminfo->rtr_send == NULL) { printf("malloc() error\n"); return NULL; } - for(i=0; irtr_send[i].tag=-1; - /* allocate rtr queue */ - comminfo->rtr=malloc(p*sizeof(IB_Peer_info_tag*)); - if(comminfo->rtr== NULL) { printf("malloc() error\n"); return NULL; } - /* allocate rtr free queue */ - comminfo->rtr_peer_free=malloc(p*sizeof(int*)); - if(comminfo->rtr_peer_free == NULL) { printf("malloc() error\n"); return NULL; } - /* allocate rtr info */ - comminfo->rtr_info=malloc(p*sizeof(IB_Peer_info)); - if(comminfo->rtr_info == NULL) { printf("malloc() error\n"); return NULL; } - /* allocate rtr l_key */ - comminfo->rtr_l_key=malloc(p*sizeof(VAPI_lkey_t)); - if(comminfo->rtr_l_key == NULL) { printf("malloc() error\n"); return NULL; } - /* allocate rtr memory region handle */ - comminfo->rtr_mr_hndl_p=malloc(p*sizeof(VAPI_mr_hndl_t)); - if(comminfo->rtr_mr_hndl_p == NULL) { printf("malloc() error\n"); return NULL; } - - /* allocate eager send queue */ - comminfo->eager_send=malloc(IB_EAGER_SIZE*sizeof(IB_Eager_data)); - if(comminfo->eager_send == NULL) { printf("malloc() error\n"); return NULL; } - for(i=0; ieager_send[i].tag=-1; - /* allocate eager queue */ - comminfo->eager=malloc(p*sizeof(IB_Eager_data*)); - if(comminfo->eager== NULL) { printf("malloc() error\n"); return NULL; } - /* allocate eager free queue */ - comminfo->eager_peer_free=malloc(p*sizeof(int*)); - if(comminfo->eager_peer_free == NULL) { printf("malloc() error\n"); return NULL; } - /* allocate eager info */ - comminfo->eager_info=malloc(p*sizeof(IB_Peer_info)); - if(comminfo->eager_info == NULL) { printf("malloc() error\n"); return NULL; } - /* allocate eager l_key */ - comminfo->eager_l_key=malloc(p*sizeof(VAPI_lkey_t)); - if(comminfo->eager_l_key == NULL) { printf("malloc() error\n"); return NULL; } - /* allocate eager memory region handle */ - comminfo->eager_mr_hndl_p=malloc(p*sizeof(VAPI_mr_hndl_t)); - if(comminfo->eager_mr_hndl_p == NULL) { printf("malloc() error\n"); return NULL; } - - /* allocate eager free info */ - comminfo->eager_free_info=malloc(p*sizeof(IB_Peer_info)); - if(comminfo->eager_free_info == NULL) { printf("malloc() error\n"); return NULL; } - /* allocate eager free memory region handle */ - comminfo->eager_free_mr_hndl_p=malloc(p*sizeof(VAPI_mr_hndl_t)); - if(comminfo->eager_free_mr_hndl_p == NULL) { printf("malloc() error\n"); return NULL; } - - /* allocate a2abuf1 */ - a2abuf1=malloc(p*sizeof(IB_Peer_info)); - if(a2abuf1 == NULL) { printf("malloc() error\n"); return NULL; } - /* allocate a2abuf2 */ - a2abuf2=malloc(p*sizeof(IB_Peer_info)); - if(a2abuf2 == NULL) { printf("malloc() error\n"); return NULL; } - /* allocate a2abuf3 */ - a2abuf3=malloc(p*sizeof(IB_Peer_info)); - if(a2abuf3 == NULL) { printf("malloc() error\n"); return NULL; } - - for(i = 0; i < p; i++) { - if(i == rank) continue; - res = VAPI_create_cq(IB_Hca_info.hca_hndl, 100000, &comminfo->sr_cq_hndl_arr[i], &num_of_entries_p ); - IB_stat( res, "VAPI_create_cq()" ); - res = VAPI_create_cq(IB_Hca_info.hca_hndl, 100000, &comminfo->rr_cq_hndl_arr[i], &num_of_entries_p ); - IB_stat( res, "VAPI_create_cq()" ); - - res = IB_Create_qp(rank, i, &IB_Hca_info.hca_hndl, &(comminfo->sr_cq_hndl_arr[i]), &(comminfo->rr_cq_hndl_arr[i]), &(comminfo->qp_hndl_arr[i]), &IB_Hca_info.pd_hndl, comm ); - //printf("[%i] rank %i has sr_cq: %i and rr_cq: %i and qp_hndl: %i\n", rank, i, (int)comminfo->sr_cq_hndl_arr[i], (int)comminfo->rr_cq_hndl_arr[i], (int)comminfo->qp_hndl_arr[i]); - if(res != 0) { printf("Error in IB_Create_qp (%i)\n", res); return NULL; } - - /* allocate rtr element */ - comminfo->rtr[i] = malloc(sizeof(IB_Peer_info_tag)*IB_RTR_SIZE); - if(comminfo->rtr[i] == NULL) { printf("malloc() error\n"); return NULL; } - for(j=0; jrtr[i][j].tag = -1; - /* allocate rtr free queue */ - comminfo->rtr_peer_free[i]=malloc(sizeof(int)*IB_RTR_SIZE); - if(comminfo->rtr_peer_free[i] == NULL) { printf("malloc() error\n"); return NULL; } - /* set free rtr queue to free :) */ - for(j=0; jrtr_peer_free[i][j] = -1; - - /* allocate eager element */ - comminfo->eager[i] = malloc(sizeof(IB_Eager_data)*IB_EAGER_SIZE); - if(comminfo->eager[i] == NULL) { printf("malloc() error\n"); return NULL; } - for(j=0; jeager[i][j]), 0, sizeof(IB_Eager_data)); } - /* allocate free queue */ - comminfo->eager_peer_free[i]=malloc(sizeof(int)*IB_EAGER_SIZE); - if(comminfo->eager_peer_free[i] == NULL) { printf("malloc() error\n"); return NULL; } - /* set free queue to free :) */ - for(j=0; jeager_peer_free[i][j] = -1; - } - - for(i=0; irtr[i]); - req_mrw_p.size = sizeof(IB_Peer_info_tag)*IB_RTR_SIZE; - req_mrw_p.pd_hndl = IB_Hca_info.pd_hndl; - req_mrw_p.acl = VAPI_EN_LOCAL_WRITE | - VAPI_EN_REMOTE_WRITE | - VAPI_EN_REMOTE_READ; - - res = VAPI_register_mr( IB_Hca_info.hca_hndl, - &req_mrw_p, - &comminfo->rtr_mr_hndl_p[i], - &rep_mrw_p ); - IB_stat( res, "VAPI_register_mr() for RTR buffers" ); - - comminfo->rtr_l_key[i] = rep_mrw_p.l_key; - - a2abuf1[i].r_key = (unsigned long)rep_mrw_p.r_key; - a2abuf1[i].addr = (unsigned long)comminfo->rtr[i]; - //printf("[%i] my info - r_key: %lu addr for rank %i: %lu\n", rank, a2abuf[i].r_key, i, a2abuf[i].addr); - - /* register eager buffer */ - memset(&req_mrw_p, 0, sizeof(VAPI_mrw_t)); - req_mrw_p.type = VAPI_MR; - req_mrw_p.start = (VAPI_virt_addr_t)(comminfo->eager[i]); - req_mrw_p.size = sizeof(IB_Eager_data)*IB_EAGER_SIZE; - req_mrw_p.pd_hndl = IB_Hca_info.pd_hndl; - req_mrw_p.acl = VAPI_EN_LOCAL_WRITE | - VAPI_EN_REMOTE_WRITE | - VAPI_EN_REMOTE_READ; - - res = VAPI_register_mr( IB_Hca_info.hca_hndl, - &req_mrw_p, - &comminfo->eager_mr_hndl_p[i], - &rep_mrw_p ); - IB_stat( res, "VAPI_register_mr() for eager buffers" ); - - comminfo->eager_l_key[i] = rep_mrw_p.l_key; - - a2abuf2[i].r_key = (unsigned long)rep_mrw_p.r_key; - a2abuf2[i].addr = (unsigned long)comminfo->eager[i]; - - /* register eager free buffer */ - memset(&req_mrw_p, 0, sizeof(VAPI_mrw_t)); - req_mrw_p.type = VAPI_MR; - req_mrw_p.start = (VAPI_virt_addr_t)(comminfo->eager_peer_free[i]); - req_mrw_p.size = sizeof(int)*IB_EAGER_SIZE; - req_mrw_p.pd_hndl = IB_Hca_info.pd_hndl; - req_mrw_p.acl = VAPI_EN_LOCAL_WRITE | - VAPI_EN_REMOTE_WRITE | - VAPI_EN_REMOTE_READ; - - res = VAPI_register_mr( IB_Hca_info.hca_hndl, - &req_mrw_p, - &comminfo->eager_free_mr_hndl_p[i], - &rep_mrw_p ); - IB_stat( res, "VAPI_register_mr() for eager free buffer" ); - - a2abuf3[i].r_key = (unsigned long)rep_mrw_p.r_key; - a2abuf3[i].addr = (unsigned long)comminfo->eager_peer_free[i]; - //printf("[%i] my info - r_key: %lu addr for rank %i: %lu\n", rank, a2abuf1[i].r_key, i, a2abuf1[i].addr); - //printf("[%i] my info - r_key: %lu addr for rank %i: %lu\n", rank, a2abuf2[i].r_key, i, a2abuf2[i].addr); - //printf("[%i] my info - r_key: %lu addr for rank %i: %lu\n", rank, a2abuf3[i].r_key, i, a2abuf3[i].addr); - } - MPI_Alltoall(a2abuf1, 2, MPI_UNSIGNED_LONG, comminfo->rtr_info, 2, MPI_UNSIGNED_LONG, comm); - free(a2abuf1); - MPI_Alltoall(a2abuf2, 2, MPI_UNSIGNED_LONG, comminfo->eager_info, 2, MPI_UNSIGNED_LONG, comm); - free(a2abuf2); - MPI_Alltoall(a2abuf3, 2, MPI_UNSIGNED_LONG, comminfo->eager_free_info, 2, MPI_UNSIGNED_LONG, comm); - free(a2abuf3); - for(i=0; irtr_info[i].r_key, i, comminfo->rtr_info[i].addr); - //printf("[%i] eager rem info - r_key: %lu addr for me at node %i: %lu\n", rank, comminfo->eager_info[i].r_key, i, comminfo->eager_info[i].addr); - //printf("[%i] eager_free rem info - r_key: %lu addr for me at node %i: %lu\n", rank, comminfo->eager_free_info[i].r_key, i, comminfo->eager_free_info[i].addr); - } - - /* register rtr send buffer */ - memset(&req_mrw_p, 0, sizeof(VAPI_mrw_t)); - req_mrw_p.type = VAPI_MR; - req_mrw_p.start = (VAPI_virt_addr_t)(comminfo->rtr_send); - req_mrw_p.size = sizeof(IB_Peer_info_tag)*IB_RTR_SIZE; - req_mrw_p.pd_hndl = IB_Hca_info.pd_hndl; - req_mrw_p.acl = VAPI_EN_LOCAL_WRITE | - VAPI_EN_REMOTE_WRITE | - VAPI_EN_REMOTE_READ; - - res = VAPI_register_mr( IB_Hca_info.hca_hndl, - &req_mrw_p, - &comminfo->rtr_send_mr_hndl_p, - &rep_mrw_p ); - IB_stat( res, "VAPI_register_mr()" ); - - comminfo->rtr_send_l_key = rep_mrw_p.l_key; - - /* register eager send buffer */ - memset(&req_mrw_p, 0, sizeof(VAPI_mrw_t)); - req_mrw_p.type = VAPI_MR; - req_mrw_p.start = (VAPI_virt_addr_t)(comminfo->eager_send); - req_mrw_p.size = sizeof(IB_Eager_data)*IB_EAGER_SIZE; - req_mrw_p.pd_hndl = IB_Hca_info.pd_hndl; - req_mrw_p.acl = VAPI_EN_LOCAL_WRITE | - VAPI_EN_REMOTE_WRITE | - VAPI_EN_REMOTE_READ; - - res = VAPI_register_mr( IB_Hca_info.hca_hndl, - &req_mrw_p, - &comminfo->eager_send_mr_hndl_p, - &rep_mrw_p ); - IB_stat( res, "VAPI_register_mr()" ); - - comminfo->eager_send_l_key = rep_mrw_p.l_key; - - /* register empty send buffer */ - memset(&req_mrw_p, 0, sizeof(VAPI_mrw_t)); - req_mrw_p.type = VAPI_MR; - req_mrw_p.start = (VAPI_virt_addr_t)(&comminfo->empty); - req_mrw_p.size = sizeof(int32_t); - req_mrw_p.pd_hndl = IB_Hca_info.pd_hndl; - req_mrw_p.acl = VAPI_EN_LOCAL_WRITE | - VAPI_EN_REMOTE_WRITE | - VAPI_EN_REMOTE_READ; - - res = VAPI_register_mr( IB_Hca_info.hca_hndl, - &req_mrw_p, - &comminfo->eager_send_mr_hndl_p, - &rep_mrw_p ); - IB_stat( res, "VAPI_register_mr()" ); - - comminfo->empty_l_key = rep_mrw_p.l_key; - - comminfo->empty = -1; - - /* put the new attribute to the comm */ - res = MPI_Attr_put(comm, IB_Gkeyval, comminfo); - if((MPI_SUCCESS != res)) { printf("Error in MPI_Attr_put() (%i)\n", res); return NULL; } - } - - return comminfo; -} - -static __inline__ int IB_Register_mem(void *buf, int size, VAPI_mr_hndl_t *mr, VAPI_rkey_t *r_key, VAPI_lkey_t *l_key) { - VAPI_mrw_t req_mrw_p; - VAPI_mrw_t rep_mrw_p; /* responded memory region */ - int res, rank; - IB_Memlistel *memel, *newel, keyel; - - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - //printf("[%i] in IB_Register_mem\n", rank); - - keyel.buf = buf; - keyel.size = size; - memel = hb_tree_search(IB_Hca_info.memlist, &keyel); - if(memel != NULL) { - //printf("[%i] we found a region from %lu to %lu :-)\n", rank, (unsigned long)memel->buf,(unsigned long)(memel->buf+memel->size)); - if(r_key != NULL) *r_key = memel->r_key; - if(l_key != NULL) *l_key = memel->l_key; - return IB_OK; - } - - //printf("[%i] we did not find a region - registering %i bytes from addr %lu to %lu :-(\n", rank, size, (unsigned long)buf, (unsigned long)(buf+size)); - memset(&req_mrw_p, 0, sizeof(VAPI_mrw_t)); - req_mrw_p.type = VAPI_MR; - req_mrw_p.start = (VAPI_virt_addr_t)buf; - req_mrw_p.size = size; - req_mrw_p.pd_hndl = IB_Hca_info.pd_hndl; - req_mrw_p.acl = VAPI_EN_LOCAL_WRITE | - VAPI_EN_REMOTE_WRITE | - VAPI_EN_REMOTE_READ; - - res = VAPI_register_mr( IB_Hca_info.hca_hndl, - &req_mrw_p, - mr, - &rep_mrw_p ); - IB_stat( res, "VAPI_register_mr()" ); - - /* TODO: we should react to "Resources temporary unavailable" (EAGAIN) - * and free some MRs ... to continue :) */ - newel = malloc(sizeof(IB_Memlistel)); - newel->buf = buf; - newel->size = size; - newel->mr=mr; - newel->r_key=rep_mrw_p.r_key; - newel->l_key=rep_mrw_p.l_key; - res = hb_tree_insert (IB_Hca_info.memlist, newel, newel, 0); - if(res != 0) printf("[%i] error in dict_insert() (%i) while inserting region from %lu to %lu\n", rank, res, (unsigned long)newel->buf, (unsigned long)(newel->buf+newel->size)); - //if(res == 0) printf("[%i] inserted region from %lu to %lu\n", rank, (unsigned long)newel->buf, (unsigned long)newel->top); - - if(r_key != NULL) *r_key = rep_mrw_p.r_key; - if(l_key != NULL) *l_key = rep_mrw_p.l_key; - - return IB_OK; -} - -static __inline__ int IB_Do_send(IB_Req *req) { - int res; - - //printf("[%i] posting data SR from %p size: %i tag: %i to %i (addr: %lu, r_key: %u)\n", req->rank, (void*)req->sr_sg_lst.addr, req->sr_sg_lst.len, req->sr_desc.imm_data, req->peer, (unsigned long)req->sr_desc.remote_addr, req->sr_desc.r_key); - res = VAPI_post_sr( IB_Hca_info.hca_hndl, req->comminfo->qp_hndl_arr[req->peer], &req->sr_desc ); - IB_stat( res, "VAPI_post_sr()" ); - - req->status = SEND_POSTED_SR; - - return IB_OK; -} - -int IB_Isend(void *buf, int count, MPI_Datatype type, int dst, int tag, MPI_Comm comm, IB_Request *request) { - int res,sendentry,i; - MPI_Aint ext; - IB_Req *req; - char *flag; - VAPI_sg_lst_entry_t sr_sg_lst; /* the IB SG list */ - VAPI_sr_desc_t sr_desc; /* the IB SR descr. */ - - *request = malloc(sizeof(IB_Req)); - req = *request; - - if(!count) { - req->status = SEND_DONE; - return IB_OK; - } - - MPI_Type_extent(type, &ext); - - req->comminfo = IB_Comm_init(comm); - if(req->comminfo == NULL) { printf("Error in IB_Comm_init()\n"); return IB_OOR; } - - if(count*ext <= IB_EAGER_LIMIT) { - /* we send this as eager message */ - - /* find free eager send buffer */ - for(sendentry=0; sendentrycomminfo->eager_send[sendentry].tag == -1) break; - } - if(sendentry==IB_EAGER_SIZE) { printf("*** eager send list full - we should retry later but crash\n"); exit(1); } - //printf("[%i] found local eager sendbuffer entry %i\n", req->rank, sendentry); - req->sendel = sendentry; - - /* copy data into buffer */ - memcpy(&req->comminfo->eager_send[sendentry].buf, buf, count*ext); - /* set header */ - req->comminfo->eager_send[sendentry].size=count*ext; - req->comminfo->eager_send[sendentry].tag=tag; - /* set next byte after message to '1' */ - flag = (char*)&req->comminfo->eager_send[sendentry].buf + req->comminfo->eager_send[sendentry].size; - *flag = (char)1; - - /* prepare eager send request */ - sr_sg_lst.addr = (VAPI_virt_addr_t)(&req->comminfo->eager_send[sendentry]); - sr_sg_lst.len = sizeof(int32_t)+2*sizeof(int16_t)+sizeof(char)+req->comminfo->eager_send[sendentry].size; /* TODO: dangerous - datatype */ - sr_sg_lst.lkey = req->comminfo->eager_send_l_key; - sr_desc.id = (u_int64_t)req; - sr_desc.opcode = VAPI_RDMA_WRITE; - sr_desc.set_se = 0; - sr_desc.comp_type = VAPI_SIGNALED; - sr_desc.sg_lst_p = &sr_sg_lst; - sr_desc.sg_lst_len = 1; - sr_desc.imm_data = (u_int64_t)0; /* not used */ - sr_desc.fence = 0; - sr_desc.compare_add = 0; - sr_desc.r_key = (VAPI_rkey_t)(req->comminfo->eager_info[dst].r_key); - /* get offset in the receiver's rtr eager array */ - for(i=0; icomminfo->eager_peer_free[dst][i] == -1) break; - } - if(i==IB_EAGER_SIZE) { printf("******************* [%i] eager list on peer %i full - we should retry later but crash\n", req->rank, dst); } - req->comminfo->eager_peer_free[dst][i] = tag; - sr_desc.remote_addr = req->comminfo->eager_info[dst].addr+i*sizeof(IB_Eager_data); /* TODO: 64 Bit */ - req->comminfo->eager_send[sendentry].index=i; - //printf("[%i] found free eager slot index %i on node %i (addr: %lu) free)\n", req->rank, i, dst, sr_desc.remote_addr); - - res = VAPI_post_sr( IB_Hca_info.hca_hndl, req->comminfo->qp_hndl_arr[dst], &sr_desc ); - IB_stat( res, "VAPI_post_sr()" ); - //printf("[%i] post EAGER SR from %lu to node %i addr %lu rkey: %u, len: %o\n", req->rank, sr_sg_lst.addr, req->peer, (unsigned long)sr_desc.remote_addr, sr_desc.r_key, sr_sg_lst.len); - - - req->status = SEND_SENT_EAGER; - //while(IB_Test(&req) != IB_OK); - - } else { - /* we send this as rendezvous */ - - /* register memory region for send */ - //printf("[%i] register send memory %lu size: %i\n", req->rank, (unsigned long)buf, (int)(count*ext)); - res = IB_Register_mem(buf, count*ext, &req->mr_hndl_p, NULL, &req->sr_sg_lst.lkey); /* TODO: count*ext Danger for datatypes ... */ - - /* initialize sr_desc as far as we can (remote r_key and addr are - * missing set after we received RTR */ - req->sr_sg_lst.addr = (VAPI_virt_addr_t)buf; - req->sr_sg_lst.len = count*ext; /* TODO: count*ext Danger for datatypes ... */ - req->sr_desc.id = (u_int64_t)req; - req->sr_desc.opcode = VAPI_SEND_WITH_IMM; - req->sr_desc.set_se = 0; - req->sr_desc.comp_type = VAPI_SIGNALED; - req->sr_desc.sg_lst_p = &req->sr_sg_lst; - req->sr_desc.sg_lst_len = 1; - req->sr_desc.imm_data = (u_int64_t)tag; - req->sr_desc.fence = 0; - req->sr_desc.compare_add= 0; - - req->status = SEND_WAITING_RTR; - } - - //IB_Test(&req); - MPI_Comm_rank(comm, &req->rank); - MPI_Comm_size(comm, &req->p); - req->tag = tag; - req->peer= dst; - - return IB_OK; -} - -int IB_Irecv(void *buf, int count, MPI_Datatype type, int src, int tag, MPI_Comm comm, IB_Request *request) { - int res, i, sendentry; - MPI_Aint ext; - VAPI_sg_lst_entry_t sr_sg_lst; /* the IB SG list */ - VAPI_sr_desc_t sr_desc; /* the IB SR descr. */ - VAPI_sg_lst_entry_t rr_sg_lst; - VAPI_rr_desc_t rr_desc; - IB_Req *req; - IB_Taglstel *newel; - - *request = malloc(sizeof(IB_Req)); - req = *request; - - if(count == 0) { - req->status = RECV_DONE; - return IB_OK; - } - - MPI_Type_extent(type, &ext); - MPI_Comm_size(comm, &req->p); - MPI_Comm_rank(comm, &req->rank); - req->tag = tag; - req->peer= src; - req->buf=buf; - - req->comminfo = IB_Comm_init(comm); - if(req->comminfo == NULL) { printf("Error in IB_Comm_init()\n"); return IB_OOR; } - - if(count*ext <= IB_EAGER_LIMIT) { - /* do nothing, wait for eager message */ - req->status = RECV_WAITING_EAGER; - /* we take a look if we received it already */ - //IB_Test(&req); - } else { - /* find a new empty sendentry in the comminfo->send array which is - * pre-registered to send RTR messages from */ - for(sendentry=0; sendentrycomminfo->rtr_send[sendentry].tag == -1) break; - } - if(sendentry==IB_RTR_SIZE) { printf("*** rtr send list full - we should retry later but crash\n"); exit(1); } - /* fill selected send entry */ - req->comminfo->rtr_send[sendentry].tag = tag; - req->comminfo->rtr_send[sendentry].addr = (unsigned long)buf; - /* remember index in sendlist to free it fast after sending */ - req->sendel = sendentry; - - /* register memory region for recv */ - res = IB_Register_mem(buf, count*ext, &req->mr_hndl_p, (VAPI_rkey_t*)&req->comminfo->rtr_send[sendentry].r_key, &rr_sg_lst.lkey); /* TODO: count*ext Danger for datatypes ... */ - - /* prepare data receive request */ - rr_sg_lst.addr = (VAPI_virt_addr_t)buf; - rr_sg_lst.len = count*ext; - rr_desc.sg_lst_p = &rr_sg_lst; - rr_desc.sg_lst_len = 1; - rr_desc.id = (u_int64_t)req; - rr_desc.opcode = VAPI_RECEIVE; - rr_desc.comp_type = VAPI_SIGNALED; - - res = VAPI_post_rr( IB_Hca_info.hca_hndl, req->comminfo->qp_hndl_arr[src], &rr_desc ); - IB_stat( res, "VAPI_post_rr()" ); - - /* add the tag and the peer to the taglist */ - /* legacy old crappy taglist :) */ - //res = IB_Addtotaglst(comminfo, tag, req, src); - - /* new fancy taglist */ - newel = malloc(sizeof(IB_Taglstel)); - newel->tag=tag; - newel->req=req; - res = hb_tree_insert(req->comminfo->taglist[src], newel, newel, 0); - //printf("[%i] inserted request %lu with tag %i and src %i\n", req->rank, (unsigned long)req, tag, src); - - /* prepare RTR send request */ - sr_sg_lst.addr = (VAPI_virt_addr_t)(&req->comminfo->rtr_send[sendentry]); - sr_sg_lst.len = sizeof(IB_Peer_info_tag); - sr_sg_lst.lkey = req->comminfo->rtr_send_l_key; - sr_desc.id = (u_int64_t)req; - sr_desc.opcode = VAPI_RDMA_WRITE; - sr_desc.set_se = 0; - sr_desc.comp_type = VAPI_SIGNALED; - sr_desc.sg_lst_p = &sr_sg_lst; - sr_desc.sg_lst_len = 1; - sr_desc.imm_data = (u_int64_t)0; /* not used */ - sr_desc.fence = 0; - sr_desc.compare_add = 0; - sr_desc.r_key = (VAPI_rkey_t)(req->comminfo->rtr_info[src].r_key); - /* get offset in the receiver's rtr RTR array */ - for(i=0; icomminfo->rtr_peer_free[src][i] == -1) break; - } - if(i==IB_RTR_SIZE) { printf("*** unexpected list full - we should retry later but crash\n"); } - req->comminfo->rtr_peer_free[src][i] = tag; - sr_desc.remote_addr = req->comminfo->rtr_info[src].addr+i*sizeof(IB_Peer_info_tag); /* TODO: 64 Bit */ - //printf("[%i] found free RTR slot index %i on node %i (addr: %lu) free)\n", req->rank, i, src, sr_desc.remote_addr); - - /* post RTR request */ - res = VAPI_post_sr( IB_Hca_info.hca_hndl, req->comminfo->qp_hndl_arr[req->peer], &sr_desc ); - IB_stat( res, "VAPI_post_sr()" ); - //printf("[%i] post RTR SR from %lu to node %i addr %lu rkey: %u\n", req->rank, sr_sg_lst.addr, req->peer, (unsigned long)sr_desc.remote_addr, sr_desc.r_key); - - req->status = RECV_SENDING_RTR; - } - - //IB_Test(&req); - - return IB_OK; -} - -int IB_Test(IB_Request *request) { - int i, j, res; - VAPI_wc_desc_t comp_desc_p; /* work completion descriptor */ - IB_Req *tmpreq, *req; - IB_Taglstel *memel, keyel; - char *flag; - - req = *request; - - if((req->status == SEND_DONE) || (req->status == RECV_DONE) || (req->status == RECV_EAGER_DONE)) - return IB_OK; - - /* if I wait for RTR - search rtr array for my tag ... */ - if(req->status == SEND_WAITING_RTR) { - for(i=0; icomminfo->rtr[req->peer][i].tag == req->tag) { - //printf("[%i] found RTR from peer %i at addr %lu (tag: %i, r_key: %lu, addr: %lu)\n", req->rank, req->peer, (unsigned long)(&req->comminfo->rtr[req->peer][i]), req->comminfo->rtr[req->peer][i].tag, req->comminfo->rtr[req->peer][i].r_key, (unsigned long)req->comminfo->rtr[req->peer][i].addr); - req->sr_desc.r_key = (VAPI_rkey_t)req->comminfo->rtr[req->peer][i].r_key; - req->sr_desc.remote_addr = req->comminfo->rtr[req->peer][i].addr; /* TODO: 64 Bit */ - /* 'free' rtr element */ - req->comminfo->rtr[req->peer][i].tag = -1; - IB_Do_send(req); - break; - } - } - } - - /* I wait for an eager message */ - if(req->status == RECV_WAITING_EAGER) { - /* ok, poll all eager slots we have from the peer we wait for */ - for(i=0; icomminfo->eager[req->peer][i].tag == req->tag) { - VAPI_sg_lst_entry_t sr_sg_lst; /* the IB SG list */ - VAPI_sr_desc_t sr_desc; /* the IB SR descr. */ - int index; - - /* poll last byte until we can be sure that we have the *full* - * message in the buffer */ - flag = (char*)&req->comminfo->eager[req->peer][i].buf + req->comminfo->eager[req->peer][i].size; - while(*flag != (char)1); - //printf("[%i] found eager message from peer %i at addr %lu (tag: %i)\n", req->rank, req->peer, (unsigned long)(&req->comminfo->eager[req->peer][i]), (int)req->comminfo->eager[req->peer][i].tag); - /* copy message to recv buffer */ - memcpy(req->buf, &req->comminfo->eager[req->peer][i].buf, req->comminfo->eager[req->peer][i].size); - index = req->comminfo->eager[req->peer][i].index; - /* set the buffer to '0' to make flag-polling useful */ - memset((void*)(&req->comminfo->eager[req->peer][i]), 0, sizeof(int32_t)+2*sizeof(int16_t)+sizeof(char)+req->comminfo->eager[req->peer][i].size); - /* RDMA into the free-buffer on the sender to indicate that my - * buffer can be reused */ - - //printf("[%i] RDMA EAGER_RECVD (%i) to node %i in buffer %lu at index %i\n", req->rank, req->comminfo->empty, req->peer, (unsigned long)req->comminfo->eager_free_info[req->peer].addr, index); - /* prepare EAGER_RECVD send request */ - sr_sg_lst.addr = (VAPI_virt_addr_t)(&req->comminfo->empty); - sr_sg_lst.len = sizeof(int); - sr_sg_lst.lkey = req->comminfo->empty_l_key; - sr_desc.id = (u_int64_t)0; - sr_desc.opcode = VAPI_RDMA_WRITE; - sr_desc.set_se = 0; - sr_desc.comp_type = VAPI_SIGNALED; - sr_desc.sg_lst_p = &sr_sg_lst; - sr_desc.sg_lst_len = 1; - sr_desc.imm_data = (u_int64_t)0; /* not used */ - sr_desc.fence = 0; - sr_desc.compare_add = 0; - sr_desc.r_key = (VAPI_rkey_t)(req->comminfo->eager_free_info[req->peer].r_key); - sr_desc.remote_addr = req->comminfo->eager_free_info[req->peer].addr+sizeof(int)*index; /* TODO: 64 Bit */ - - /* post EAGER_RECVD request */ - res = VAPI_post_sr( IB_Hca_info.hca_hndl, req->comminfo->qp_hndl_arr[req->peer], &sr_desc ); - IB_stat( res, "VAPI_post_sr()" ); - /* mark receive as done */ - req->status = RECV_EAGER_DONE; - /* leave loop */ - break; - // return IB_OK; - // -> we need to poll CQs ... - } - } - // return IB_CONTINUE; - } - -//t=0-MPI_Wtime(); - /* poll all CQs of the comm related to req (we should probably only poll those where we wait for something */ - //for(i=0; ip; i++) { - //for(i=req->peer; i<=req->peer; i++) { - //if(i == req->rank) continue; - i=req->peer; - /************************************** SEND QUEUE handling ************************************/ - res = VAPI_poll_cq( IB_Hca_info.hca_hndl, req->comminfo->sr_cq_hndl_arr[i], &comp_desc_p ); - if((res != VAPI_EBUSY) && (res != VAPI_CQ_EMPTY)) { - //printf("[%i] have SR CQE from host %i\n", req->rank, i); - IB_CQ_stat( comp_desc_p, "VAPI_poll_cq(SR)" ); - /* id == 0 for eager recvd messages - we do not need to wait for them ... */ - if(comp_desc_p.id != 0) { - tmpreq = (IB_Req*)(comp_desc_p.id); - if(tmpreq->status == SEND_POSTED_SR) { - /* we sent the message and are ready */ - /* TODO: free *all* request resources here */ - tmpreq->status = SEND_DONE; - //printf("[%i] req %lu send to %i with tag %i is done ...\n", tmpreq->rank, (unsigned long)tmpreq, tmpreq->peer, tmpreq->tag); - } else if (tmpreq->status == SEND_SENT_EAGER) { - /* set rtr sendlist element to free */ - tmpreq->comminfo->eager_send[tmpreq->sendel].tag = -1; - tmpreq->status = SEND_DONE; - //printf("[%i] eager req %lu send to %i with tag %i is done ...\n", tmpreq->rank, (unsigned long)tmpreq, tmpreq->peer, tmpreq->tag); - } else if ((tmpreq->status == RECV_SENDING_RTR) || (tmpreq->status == RECV_DONE)) { - /* set rtr sendlist element to free */ - tmpreq->comminfo->rtr_send[tmpreq->sendel].tag = -1; - /* do not change DONE requests back :o) */ - if(tmpreq->status != RECV_DONE) tmpreq->status = RECV_SENT_RTR; - } else { - printf("[%i] req %lu unexpected status (%i) for send to %i (tag: %i) after poll sr_cq \n", tmpreq->rank, (unsigned long)tmpreq, tmpreq->status, tmpreq->peer, tmpreq->tag); - } - } - } - /************************************** RECEIVE QUEUE handling ************************************/ - res = VAPI_poll_cq( IB_Hca_info.hca_hndl, req->comminfo->rr_cq_hndl_arr[i], &comp_desc_p ); - if((res != VAPI_EBUSY) && (res != VAPI_CQ_EMPTY)) { - //printf("[%i] have RR CQE from host %i\n", req->rank, i); - IB_CQ_stat( comp_desc_p, "VAPI_poll_cq(RR)" ); - - /* we received real data - match it (tag, peer)*/ - keyel.tag=comp_desc_p.imm_data; - memel = hb_tree_search(req->comminfo->taglist[i], &keyel); - if(memel == NULL) { - printf("[%i] got unexpected packet with tag: %i, peer: %i\n", req->rank, comp_desc_p.imm_data, i); - printf(" this CANNOT happen!!!\n"); - return IB_CONTINUE; - } - /* delete element - TODO: this should actually be done together with - * the find ... we should extend libdict */ - res = hb_tree_remove(req->comminfo->taglist[i], &keyel, 0); - if(res != 0) { printf("error deleting tag element in hb_tree_remove()\n"); } - tmpreq = memel->req; - //printf("[%i] found request %lu for tag %i and src %i\n", tmpreq->rank, (unsigned long)tmpreq, comp_desc_p.imm_data, i); - if((tmpreq->status == RECV_SENDING_RTR) || (tmpreq->status == RECV_SENT_RTR)) { - /* TODO: free *all* req resources here */ - //printf("[%i] req %lu we received data message from host %i for tag %i-> done \n", req->rank, (unsigned long)tmpreq, req->peer, req->tag); - - /* get offset in the receiver's array */ - for(j=0; jcomminfo->rtr_peer_free[tmpreq->peer][j] == tmpreq->tag) break; - } - if(j==IB_RTR_SIZE) { printf("[%i] we did not find tag %i to delete - should not HAPPEN!!!\n", tmpreq->rank, tmpreq->tag); } - tmpreq->comminfo->rtr_peer_free[tmpreq->peer][j] = -1; - - tmpreq->status = RECV_DONE; - } else { - printf("[%i] req %lu (tag: %i) unexpected status (%i) after poll rr_cq \n", tmpreq->rank, (unsigned long)tmpreq, tmpreq->tag, tmpreq->status); - } - } - //} -//t+=MPI_Wtime(); -//printf("time: %lf\n", t*1e6); - - if((req->status == SEND_DONE) || (req->status == RECV_DONE) || (req->status == RECV_EAGER_DONE)) - return IB_OK; - else - return IB_CONTINUE; -} - -int IB_Wait(IB_Request *request) { - - while(IB_Test(request) != IB_OK) {}; - - return IB_OK; -} - -int IB_Testall(int count, IB_Request *requests, int *flag) { - int i, res; - - *flag = 1; - for(i=0; istatus != SEND_DONE) && (requests[i]->status != RECV_DONE) && (requests[i]->status != RECV_EAGER_DONE)) - { - res = IB_Test(&requests[i]); /* we shouldn't test ready requests ... */ - /* in case of error */ - if((res != IB_OK) && (res != IB_CONTINUE)) break; - /* we have at least one unfinished request ... */ - if(res == IB_CONTINUE) done = IB_CONTINUE; - } - } - } while(done == IB_CONTINUE); - - return done; -} - -static int IB_Create_qp( int rank, int remote, - VAPI_hca_hndl_t *hca_hndl_p, - VAPI_cq_hndl_t *sr_cq_hndl_p, - VAPI_cq_hndl_t *rr_cq_hndl_p, - VAPI_qp_hndl_t *qp_hndl_p, - VAPI_pd_hndl_t *pd_hndp_p, - MPI_Comm comm) { - // variables - VAPI_qp_init_attr_t qp_init_attr_p; // QP init attribs - VAPI_ret_t ret; // VAPI return value - VAPI_qp_prop_t qp_prop_p, rem_qp_prop_p; // QP properties - VAPI_hca_port_t hca_port_p, rem_hca_port_p; // port properties - VAPI_qp_attr_mask_t qp_attr_mask_p; // QP attribute mask - VAPI_qp_attr_t qp_attr_p; // QP attributes - VAPI_qp_cap_t qp_cap_p; // QP capabilities - - MPI_Status stat; // mpi status - - // get my LID - ret = VAPI_query_hca_port_prop( *hca_hndl_p, - (IB_port_t)1, - &hca_port_p ); - IB_stat( ret, "EVAPI_get_hca_hndl()" ); - - - // set QP Properties ... - qp_init_attr_p.sq_cq_hndl = *sr_cq_hndl_p; // cq associated with sr's - qp_init_attr_p.rq_cq_hndl = *rr_cq_hndl_p; // cq associated with rr's - qp_init_attr_p.cap.max_oust_wr_sq = 1000; // TODO guessed - qp_init_attr_p.cap.max_oust_wr_rq = 1000; // TODO guessed - qp_init_attr_p.cap.max_sg_size_sq = 1; // TODO guessed - qp_init_attr_p.cap.max_sg_size_rq = 1; // TODO guessed - qp_init_attr_p.cap.max_inline_data_sq = 1; // TODO guessed - // qp_init_attr_p.rdd_hndl = no rdd (Reliable Datagram Domain) used - qp_init_attr_p.sq_sig_type = VAPI_SIGNAL_ALL_WR; // signal all submitted WR's - qp_init_attr_p.rq_sig_type = VAPI_SIGNAL_ALL_WR; // signal all submitted WR's - qp_init_attr_p.pd_hndl = *pd_hndp_p; // the PD - // QP Transport Service Type - qp_init_attr_p.ts_type = VAPI_TS_RC; - - // create QP - ret = VAPI_create_qp( *hca_hndl_p, - &qp_init_attr_p, - qp_hndl_p, - &qp_prop_p ); - IB_stat( ret, "VAPI_create_qp()" ); - - MPI_Send(&hca_port_p.lid, 1, MPI_INT, remote, 0, comm); - MPI_Recv(&rem_hca_port_p.lid, 1, MPI_INT, remote, 0, comm, &stat); - MPI_Send(&qp_prop_p.qp_num, 1, MPI_INT, remote, 0, comm); - MPI_Recv(&rem_qp_prop_p.qp_num, 1, MPI_INT, remote, 0, comm, &stat); - - //fprintf(stderr, "[INFO] (lid:qp) - %x:%u -> %x:%u\n", hca_port_p.lid, qp_prop_p.qp_num, rem_hca_port_p.lid, rem_qp_prop_p.qp_num); - - // see page 222 for details ... - /* Transition RST to INIT */ - QP_ATTR_MASK_CLR_ALL(qp_attr_mask_p); - // QP State - QP_ATTR_MASK_SET( qp_attr_mask_p, QP_ATTR_QP_STATE); - qp_attr_p.qp_state = VAPI_INIT; - - // partition key index (see page 319) - QP_ATTR_MASK_SET( qp_attr_mask_p, QP_ATTR_PKEY_IX); - qp_attr_p.pkey_ix = 0; // first partition key - - // queue key -> only for datagram (RD, UD) TODO set this! - // QP_ATTR_MASK_SET( qp_attr_mask_p, QP_ATTR_QKEY); - // qp_attr_p.qkey = 0; - - // enable/disable RDMA R / Atomic -> allow all :) - QP_ATTR_MASK_SET( qp_attr_mask_p, QP_ATTR_REMOTE_ATOMIC_FLAGS); - qp_attr_p.remote_atomic_flags = VAPI_EN_REM_WRITE; - - // physical port - QP_ATTR_MASK_SET( qp_attr_mask_p, QP_ATTR_PORT); - qp_attr_p.port = 1; // TODO should not be static! - - ret = VAPI_modify_qp( *hca_hndl_p, - *qp_hndl_p, - &qp_attr_p, - &qp_attr_mask_p, - &qp_cap_p ); - IB_stat( ret, "VAPI_modify_qp() (RST->INIT)" ); - - // Transition INIT to RTR - QP_ATTR_MASK_CLR_ALL(qp_attr_mask_p); - - // new state - QP_ATTR_MASK_SET( qp_attr_mask_p, QP_ATTR_QP_STATE); - qp_attr_p.qp_state = VAPI_RTR; - - // remote node address vector - QP_ATTR_MASK_SET( qp_attr_mask_p, QP_ATTR_AV); - qp_attr_p.av.sl = 0; // TODO guessed - qp_attr_p.av.dlid = rem_hca_port_p.lid; // partners lid - qp_attr_p.av.src_path_bits = 0; // TODO guessed - qp_attr_p.av.static_rate = 0; // TODO guessed - qp_attr_p.av.grh_flag = 0; // non set ... - qp_attr_p.av.traffic_class = 0; // TODO guessed - qp_attr_p.av.hop_limit = 255; // should be ok - only global routing - qp_attr_p.av.flow_label = 0; // only global routing - qp_attr_p.av.sgid_index = 0; // only global routing - qp_attr_p.av.port = 1; // TODO guessed - - // packet sequence number - QP_ATTR_MASK_SET(qp_attr_mask_p, QP_ATTR_RQ_PSN); - qp_attr_p.rq_psn = 0; - - // number of responder resources for RDMA R + Atomic - QP_ATTR_MASK_SET(qp_attr_mask_p, QP_ATTR_QP_OUS_RD_ATOM); - qp_attr_p.qp_ous_rd_atom = 5; //TODO tune here? - - // minimum rnr nak timer - QP_ATTR_MASK_SET(qp_attr_mask_p, QP_ATTR_MIN_RNR_TIMER); - qp_attr_p.min_rnr_timer = 1; // TODO guessed - - // dest QP number - QP_ATTR_MASK_SET(qp_attr_mask_p, QP_ATTR_DEST_QP_NUM); - qp_attr_p.dest_qp_num = rem_qp_prop_p.qp_num; // partners qp num - - QP_ATTR_MASK_SET(qp_attr_mask_p, QP_ATTR_PATH_MTU); - qp_attr_p.path_mtu = MTU1024; // TODO tune here - - ret = VAPI_modify_qp( *hca_hndl_p, - *qp_hndl_p, - &qp_attr_p, - &qp_attr_mask_p, - &qp_cap_p ); - IB_stat( ret, "VAPI_modify_qp() (INIT->RTR)" ); - - - // Transition RTR to RTS - QP_ATTR_MASK_CLR_ALL(qp_attr_mask_p); - // new state - QP_ATTR_MASK_SET( qp_attr_mask_p, QP_ATTR_QP_STATE); - qp_attr_p.qp_state = VAPI_RTS; - - // SQ sequence number - QP_ATTR_MASK_SET(qp_attr_mask_p, QP_ATTR_SQ_PSN); - qp_attr_p.sq_psn = 0; - - // local ACK timeout - QP_ATTR_MASK_SET(qp_attr_mask_p, QP_ATTR_TIMEOUT); - qp_attr_p.timeout = 10; // TODO tune here - - // retry count - QP_ATTR_MASK_SET(qp_attr_mask_p, QP_ATTR_RNR_RETRY); - qp_attr_p.rnr_retry = 255; // increased due to VAPI_RETRY_EXC_ERR if 30 nodes fire 0.5MB at one :) -> maximum :) - - // number of outstanding RDMA R / atomic at destination - QP_ATTR_MASK_SET(qp_attr_mask_p, QP_ATTR_OUS_DST_RD_ATOM); - qp_attr_p.ous_dst_rd_atom = 10; // TODO tune here - - // retry count - QP_ATTR_MASK_SET(qp_attr_mask_p, QP_ATTR_RETRY_COUNT); - qp_attr_p.retry_count = 255; // guessed - - ret = VAPI_modify_qp( *hca_hndl_p, - *qp_hndl_p, - &qp_attr_p, - &qp_attr_mask_p, - &qp_cap_p ); - IB_stat( ret, "VAPI_modify_qp() (RTR->RTS)" ); - - return IB_OK; -} - -static __inline__ int IB_Memlist_compare_entries(IB_Memlistel *a, IB_Memlistel *b,void *param) { - - /* two memory regions are defined as equal if they have some common - * memory - more is not possible, because we have to ensure - * reflexibility (a=b includes b=a) */ - - if( (a->buf == b->buf) && (a->size == b->size) ) { - return 0; - } - if ( (a->buf < b->buf)) { - return -1; - } - return +1; -} - -static __inline__ void IB_Memlist_delete_key(IB_Memlistel *k) { - /* do nothing because the key and the data element are identical :-) - * both (the single one :) is freed in IB_Memlist_memlist_delete() */ -} - -static __inline__ void IB_Memlist_memlist_delete(IB_Memlistel *entry) { - /* free entry and deregister MR here ... */ -} - -static __inline__ int IB_Taglist_compare_entries(IB_Taglstel *a, IB_Taglstel *b,void *param) { - - if( a->tag == b->tag ) { - return 0; - } - if( a->tag < b->tag ) { - return -1; - } - return +1; -} - -static __inline__ void IB_Taglist_delete_key(IB_Taglstel *k) { - /* do nothing because the key and the data element are identical :-) - * both (the single one :) is freed in IB_Memlist_memlist_delete() */ -} - -static __inline__ void IB_Taglist_delete(IB_Taglstel *entry) { - /* free taglistentry */ - free(entry); -} diff --git a/ompi/mca/coll/libnbc/ib.h b/ompi/mca/coll/libnbc/ib.h deleted file mode 100644 index 7702545a43..0000000000 --- a/ompi/mca/coll/libnbc/ib.h +++ /dev/null @@ -1,119 +0,0 @@ -#ifndef __IB_H__ -#define __IB_H__ - -#include -#include -#include -#include -#include -#include -#include - -#include "dict.h" - -#define IB_OK 0 -#define IB_OOR 1 -#define IB_CONTINUE 2 - -#define IB_RTR_SIZE 320 /* number of RTR buffers per peer */ -#define IB_EAGER_SIZE 160 /* number of eager buffers per peer */ -#define IB_EAGER_LIMIT 8247 /* equals 8192 + 64 - 9 (tag, index, size, flag) to ensure 64 byte alignment */ - -typedef struct { - u_int64_t r_key; /* r_key of peer - TODO: should be u_int32_t ...*/ - u_int64_t addr; /* addr of peer - TODO: could be u_int32_t ... */ -} IB_Peer_info; - -typedef struct { - u_int64_t r_key; /* r_key of peer - TODO: should be u_int32_t ... */ - u_int64_t addr; /* addr of peer - TODO: could be u_int32_t ... */ - int32_t tag; /* tag has to be at the end (we do not send with immediate, so tag indicates receive -> could be dangerous) */ -} IB_Peer_info_tag; - -typedef struct { - VAPI_mr_hndl_t mr_hndl_p; /* memroy region handle - should really be a pointer to a memlist-element ... */ - VAPI_sg_lst_entry_t sr_sg_lst; /* the IB SG list */ - VAPI_sr_desc_t sr_desc; /* the IB SR descr. */ - int p; /* how many procs (RCQ, SCQ, QP) are in this comm */ - struct struct_IB_Comminfo *comminfo; /* the communicator info struct */ - int tag; /* our tag */ - int rank; /* our rank */ - int peer; /* the peer (dst for send, src for recv */ - void *buf; /* we need the buf for eager messages on the receiver side */ - int sendel; /* the element in comminfo.send[element] which is use by this request to send RTR or eager messages - we want to free it after sending RTR/eager */ - enum {FREE, SEND_WAITING_RTR, SEND_POSTED_SR, SEND_SENT_EAGER, RECV_SENDING_RTR, RECV_SENT_RTR, RECV_WAITING_EAGER, SEND_DONE, RECV_DONE, RECV_EAGER_DONE} status; /* this indicates the operation (send,recv) and the status of this op */ -} IB_Req; - -typedef IB_Req* IB_Request; - -struct struct_IB_Taglstel { - int tag; /* the tag -> key element */ - IB_Req *req; /* the request having this tag */ -}; -typedef struct struct_IB_Taglstel IB_Taglstel; - -struct struct_IB_Memlistel { - void *buf; - int size; - VAPI_mr_hndl_t *mr; - VAPI_rkey_t r_key; - VAPI_lkey_t l_key; -}; -typedef struct struct_IB_Memlistel IB_Memlistel; - -typedef struct { - int16_t index; /* the index in my free-array on the sender side - I should RDMA a -1 into this array to indicate the receiption */ - int16_t size; /* the actual size of the message, after this size follows a flag (single byte) to poll on receiption */ - int32_t tag; /* the message tag */ - int8_t buf[IB_EAGER_LIMIT]; /* the data buffer - should be chosen that the whole structure size is 64 byte aligned */ - int8_t flag; /* the flag to poll for completion - only if buffer is full */ -} IB_Eager_data; - -struct struct_IB_Comminfo { - VAPI_qp_hndl_t *qp_hndl_arr; /* QPs to all ranks in this comm */ - VAPI_cq_hndl_t *sr_cq_hndl_arr; /* SR CQs for all ranks in this comm */ - VAPI_cq_hndl_t *rr_cq_hndl_arr; /* RR CQs for all ranks in this comm */ - - /* the old crappy linear taglist */ - IB_Taglstel *taglisthead; /* the comm specific taglist */ - IB_Taglstel *taglistend; /* the end pointer of the taglist */ - - hb_tree **taglist; /* the new fancy AVL tree taglists - one for each peer (2-d matching is not possible :-(, so the peer-dimension is like a hash-table O(1) :) ) */ - - IB_Peer_info_tag *rtr_send; /* send queue for me (only for RTR) - IB_RTR_SIZE elements */ - VAPI_mr_hndl_t rtr_send_mr_hndl_p; /* memory region handle - needed to free MR ... */ - VAPI_lkey_t rtr_send_l_key; /* l_key for send */ - - VAPI_mr_hndl_t *rtr_mr_hndl_p; /* memory region handles per proc - needed to free MR ... */ - IB_Peer_info *rtr_info; /* rtr r_key, addr for each host */ - VAPI_lkey_t *rtr_l_key; /* l_key for rtr region */ - volatile IB_Peer_info_tag **rtr; /* rtr queue for each host - IB_RTR_SIZE elements */ - int **rtr_peer_free; /* indicates which elements are free at the specific peer (-1 means free, > -1 means filled with tag x */ - - IB_Eager_data *eager_send; /* eager send queue for me (only for eager) - IB_EAGER_SIZE elements */ - VAPI_mr_hndl_t eager_send_mr_hndl_p; /* memory region handle - needed to free MR ... */ - VAPI_lkey_t eager_send_l_key; /* l_key for eager send */ - - VAPI_mr_hndl_t *eager_mr_hndl_p; /* memory region handles per peer - needed to free MR ... */ - IB_Peer_info *eager_info; /* rtr r_key, addr for each host */ - VAPI_lkey_t *eager_l_key; /* l_key for rtr region per peer */ - volatile IB_Eager_data **eager; /* rtr queue for each host - IB_RTR_SIZE elements */ - int **eager_peer_free; /* indicates which elements are free at the specific peer (-1 means free, > -1 means filled with tag x */ - - int empty; /* this is just a buffer to RDMA into eager_peer_free on the sender */ - VAPI_mr_hndl_t *eager_free_mr_hndl_p; /* memory region handles per peer - needed to free MR ... */ - IB_Peer_info *eager_free_info; /* rtr r_key, addr for each host to RDMA EAGER_DONE */ - VAPI_mr_hndl_t empty_mr_hndl_p; /* memory region handle - needed to free MR ... */ - VAPI_lkey_t empty_l_key; /* l_key for empty int */ -}; -typedef struct struct_IB_Comminfo IB_Comminfo; - -/* function prototypes ... */ -int IB_Testall(int count, IB_Request *requests, int *flag); -int IB_Waitall(int count, IB_Request *requests); -int IB_Wait(IB_Request *req); -int IB_Test(IB_Request *req); -int IB_Isend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm mycomm, IB_Request *request); -int IB_Irecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm mycomm, IB_Request *request); - -#endif diff --git a/ompi/mca/coll/libnbc/ib_main.c b/ompi/mca/coll/libnbc/ib_main.c deleted file mode 100644 index 8d004fcc5f..0000000000 --- a/ompi/mca/coll/libnbc/ib_main.c +++ /dev/null @@ -1,104 +0,0 @@ -#define IB -#include -#include "ib.h" - -int main(int argc, char **argv) { - int rank, res, size, i, loops, j, tag; - IB_Request req; - double t1=0, t2=0, t3=0, t4=0, t5, t6, t7; - MPI_Request mpireq; - void *buf2; - - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - size = 1; - loops= 50; - - buf2 = malloc(size); - if(buf2 == NULL) printf("malloc() error\n"); - - if(rank == 0) { - res = IB_Isend(buf2, size, MPI_BYTE, 1, 1, MPI_COMM_WORLD, &req); - if(res) printf("Error in IB_Send (%i) \n", res); - res = IB_Wait(&req); - res = IB_Irecv(buf2, size, MPI_BYTE, 1, 1, MPI_COMM_WORLD, &req); - if(res) printf("Error in IB_Recv (%i) \n", res); - res = IB_Wait(&req); - } else { - res = IB_Irecv(buf2, size, MPI_BYTE, 0, 1, MPI_COMM_WORLD, &req); - if(res) printf("Error in IB_Recv (%i)\n", res); - res = IB_Wait(&req); - res = IB_Isend(buf2, size, MPI_BYTE, 0, 1, MPI_COMM_WORLD, &req); - if(res) printf("Error in IB_Send (%i) \n", res); - res = IB_Wait(&req); - } - - printf("[%i] MEASUREMENT\n", rank); - t1=t2=t3=t4=t5=t6=0; - - - - for(j=1; j - -int main(int argc, char **argv) { - NBC_Handle handle1, handle2; - int rank, i,j, res, p; - int *buf1, *buf2, *ptr; - - MPI_Init(&argc, &argv); - - /* shut up compiler */ - handle1 = handle2; - handle2 = handle1; - - res = MPI_Comm_size(MPI_COMM_WORLD, &p); - res = MPI_Comm_rank(MPI_COMM_WORLD, &rank); - if(res != MPI_SUCCESS) printf("Error in MPI_Comm_rank!\n"); - - buf1=malloc(2*p*sizeof(int)); - buf2=malloc(2*p*sizeof(int)); - - for (i=0; i<1; i++) { - for(j=0;jschedule);*/ - /* the schedule pointer itself is also malloc'd */ - /*free(handle->schedule);*/ - /* if the nbc_I attached some data */ - if(NULL != handle->tmpbuf) free(handle->tmpbuf); - - return NBC_OK; -} - -/* this function puts a send into the schedule */ -int NBC_Sched_send(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule) { - int size; - NBC_Args_send* send_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = realloc(*schedule, size+sizeof(NBC_Args_send)+sizeof(NBC_Fn_type)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - - /* adjust the function type */ - *(NBC_Fn_type*)((char*)*schedule+size)=SEND; - - /* store the passed arguments */ - send_args = (NBC_Args_send*)((char*)*schedule+size+sizeof(NBC_Fn_type)); - send_args->buf=buf; - send_args->tmpbuf=tmpbuf; - send_args->count=count; - send_args->datatype=datatype; - send_args->dest=dest; - - /* increase number of elements in schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding send - ends at byte %i\n", (int)(size+sizeof(NBC_Args_send)+sizeof(NBC_Fn_type))); - - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Args_send)+sizeof(NBC_Fn_type)); - - return NBC_OK; -} - -/* this function puts a receive into the schedule */ -int NBC_Sched_recv(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule) { - int size; - NBC_Args_recv* recv_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = realloc(*schedule, size+sizeof(NBC_Args_recv)+sizeof(NBC_Fn_type)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - - /* adjust the function type */ - *(NBC_Fn_type*)((char*)*schedule+size)=RECV; - - /* store the passed arguments */ - recv_args=(NBC_Args_recv*)((char*)*schedule+size+sizeof(NBC_Fn_type)); - recv_args->buf=buf; - recv_args->tmpbuf=tmpbuf; - recv_args->count=count; - recv_args->datatype=datatype; - recv_args->source=source; - - /* increase number of elements in schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding receive - ends at byte %i\n", (int)(size+sizeof(NBC_Args_recv)+sizeof(NBC_Fn_type))); - - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Args_recv)+sizeof(NBC_Fn_type)); - - return NBC_OK; -} - -/* this function puts an operation into the schedule */ -int NBC_Sched_op(void *buf3, char tmpbuf3, void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule) { - int size; - NBC_Args_op* op_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = realloc(*schedule, size+sizeof(NBC_Args_op)+sizeof(NBC_Fn_type)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - - /* adjust the function type */ - *(NBC_Fn_type*)((char*)*schedule+size)=OP; - - /* store the passed arguments */ - op_args=(NBC_Args_op*)((char*)*schedule+size+sizeof(NBC_Fn_type)); - op_args->buf1=buf1; - op_args->buf2=buf2; - op_args->buf3=buf3; - op_args->tmpbuf1=tmpbuf1; - op_args->tmpbuf2=tmpbuf2; - op_args->tmpbuf3=tmpbuf3; - op_args->count=count; - op_args->op=op; - op_args->datatype=datatype; - - /* increase number of elements in schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding op - ends at byte %i\n", (int)(size+sizeof(NBC_Args_op)+sizeof(NBC_Fn_type))); - - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Args_op)+sizeof(NBC_Fn_type)); - - return NBC_OK; -} - -/* this function puts a copy into the schedule */ -int NBC_Sched_copy(void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, MPI_Datatype tgttype, NBC_Schedule *schedule) { - int size; - NBC_Args_copy* copy_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = realloc(*schedule, size+sizeof(NBC_Args_copy)+sizeof(NBC_Fn_type)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - - /* adjust the function type */ - *(NBC_Fn_type*)((char*)*schedule+size)=COPY; - - /* store the passed arguments */ - copy_args = (NBC_Args_copy*)((char*)*schedule+size+sizeof(NBC_Fn_type)); - copy_args->src=src; - copy_args->tmpsrc=tmpsrc; - copy_args->srccount=srccount; - copy_args->srctype=srctype; - copy_args->tgt=tgt; - copy_args->tmptgt=tmptgt; - copy_args->tgtcount=tgtcount; - copy_args->tgttype=tgttype; - - /* increase number of elements in schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding copy - ends at byte %i\n", (int)(size+sizeof(NBC_Args_copy)+sizeof(NBC_Fn_type))); - - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Args_copy)+sizeof(NBC_Fn_type)); - - return NBC_OK; -} - -/* this function puts a unpack into the schedule */ -int NBC_Sched_unpack(void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, NBC_Schedule *schedule) { - int size; - NBC_Args_unpack* unpack_args; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule is %i bytes\n", size);*/ - *schedule = realloc(*schedule, size+sizeof(NBC_Args_unpack)+sizeof(NBC_Fn_type)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - - /* adjust the function type */ - *(NBC_Fn_type*)((char*)*schedule+size)=UNPACK; - - /* store the passed arguments */ - unpack_args = (NBC_Args_unpack*)((char*)*schedule+size+sizeof(NBC_Fn_type)); - unpack_args->inbuf=inbuf; - unpack_args->tmpinbuf=tmpinbuf; - unpack_args->count=count; - unpack_args->datatype=datatype; - unpack_args->outbuf=outbuf; - unpack_args->tmpoutbuf=tmpoutbuf; - - /* increase number of elements in schedule */ - NBC_INC_NUM_ROUND(*schedule); - NBC_DEBUG(10, "adding unpack - ends at byte %i\n", (int)(size+sizeof(NBC_Args_unpack)+sizeof(NBC_Fn_type))); - - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(NBC_Args_unpack)+sizeof(NBC_Fn_type)); - - return NBC_OK; -} - -/* this function ends a round of a schedule */ -int NBC_Sched_barrier(NBC_Schedule *schedule) { - int size; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("round terminated at %i bytes\n", size);*/ - *schedule = realloc(*schedule, size+sizeof(char)+sizeof(int)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - - /* add the barrier char (1) because another round follows */ - *(char*)((char*)*schedule+size)=1; - - /* set round count elements = 0 for new round */ - *(int*)((char*)*schedule+size+sizeof(char))=0; - NBC_DEBUG(10, "ending round at byte %i\n", (int)(size+sizeof(char)+sizeof(int))); - - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(char)+sizeof(int)); - - return NBC_OK; -} - -/* this function ends a schedule */ -int NBC_Sched_commit(NBC_Schedule *schedule) { - int size; - - /* get size of actual schedule */ - NBC_GET_SIZE(*schedule, size); - /*printf("schedule terminated at %i bytes\n", size);*/ - *schedule = realloc(*schedule, size+sizeof(char)); - if(*schedule == NULL) { printf("Error in realloc()\n"); return NBC_OOR; } - - /* add the barrier char (0) because this is the last round */ - *(char*)((char*)*schedule+size)=0; - NBC_DEBUG(10, "closing schedule %p at byte %i\n", *schedule, (int)(size+sizeof(char))); - - /* increase size of schedule */ - NBC_INC_SIZE(*schedule, sizeof(char)); - - return NBC_OK; -} - -int NBC_Progress(NBC_Handle *handle) { - int flag, res; - long size; - char *delim; - - if((handle->req_count > 0) && (handle->req_array != NULL)) { - NBC_DEBUG(50, "NBC_Progress: testing for %i requests\n", handle->req_count); -#ifdef OMPI_COMPONENT - /*res = ompi_request_test_all(handle->req_count, handle->req_array, &flag, MPI_STATUSES_IGNORE);*/ - res = MPI_Testall(handle->req_count, handle->req_array, &flag, MPI_STATUS_IGNORE); - if(res != OMPI_SUCCESS) { printf("MPI Error in MPI_Testall() (%i)\n", res); return res; } -#endif -#ifdef MPI - res = MPI_Testall(handle->req_count, handle->req_array, &flag, MPI_STATUS_IGNORE); - if(res != MPI_SUCCESS) { printf("MPI Error in MPI_Testall() (%i)\n", res); return res; } -#endif -#ifdef IB - res = IB_Testall(handle->req_count, handle->req_array, &flag); - if(res != MPI_SUCCESS) { printf("MPI Error in MPI_Testall() (%i)\n", res); return res; } -#endif - } else { - flag = 1; /* we had no open requests -> proceed to next round */ - } - - /* a round is finished */ - if(flag) { - /* adjust delim to start of current round */ - NBC_DEBUG(10, "NBC_Progress: going in schedule %p to row-offset: %li\n", *handle->schedule, handle->row_offset); - delim = (char*)*handle->schedule + handle->row_offset; - NBC_DEBUG(10, "delim: %p\n", delim); - NBC_GET_ROUND_SIZE(delim, size); - NBC_DEBUG(10, "size: %li\n", size); - /* adjust delim to end of current round -> delimiter */ - delim = delim + size; - - if(handle->req_array != NULL) { - /* free request array */ - free(handle->req_array); - handle->req_array = NULL; - } - handle->req_count = 0; - - if(*delim == 0) { - /* this was the last round - we're done */ - NBC_DEBUG(5, "NBC_Progress last round finished - we're done\n"); - return NBC_OK; - } else { - NBC_DEBUG(5, "NBC_Progress round finished - goto next round\n"); - /* move delim to start of next round */ - delim = delim+1; - /* initializing handle for new virgin round */ - handle->row_offset = (long)delim - (long)*handle->schedule; - /* kick it off */ - res = NBC_Start_round(handle); - if(NBC_OK != res) { printf("Error in NBC_Start_round() (%i)\n", res); return res; } - } - } - - return NBC_CONTINUE; -} - -int NBC_Progress_block(NBC_Handle *handle) { - int res; - long size; - char *delim; - - do { - if((handle->req_count > 0) && (handle->req_array != NULL)) { - NBC_DEBUG(50, "NBC_Progress_block: waiting for %i requests\n", handle->req_count); -#ifdef OMPI_COMPONENT - /*res = ompi_request_wait_all(handle->req_count, handle->req_array, MPI_STATUSES_IGNORE);*/ - res = MPI_Waitall(handle->req_count, handle->req_array, MPI_STATUS_IGNORE); - if(res != OMPI_SUCCESS) { printf("MPI Error in MPI_Waitall() (%i)\n", res); return res; } -#endif -#ifdef MPI - res = MPI_Waitall(handle->req_count, handle->req_array, MPI_STATUS_IGNORE); - if(res != MPI_SUCCESS) { printf("MPI Error in MPI_Waitall() (%i)\n", res); return res; } -#endif -#ifdef IB - res = IB_Waitall(handle->req_count, handle->req_array); - if(res != NBC_OK) { printf("MPI Error in MPI_Waitall() (%i)\n", res); return res; } -#endif - } - - /* a round is finished - adjust delim to start of current round */ - NBC_DEBUG(10, "NBC_Progress_block: going in schedule %p to row-offset: %li\n", *handle->schedule, handle->row_offset); - delim = (char*)*handle->schedule + handle->row_offset; - NBC_DEBUG(10, "delim: %p\n", delim); - NBC_GET_ROUND_SIZE(delim, size); - NBC_DEBUG(10, "size: %li\n", size); - /* adjust delim to end of current round -> delimiter */ - delim = delim + size; - - if(handle->req_array != NULL) { - /* free request array */ - free(handle->req_array); - handle->req_array = NULL; - } - handle->req_count = 0; - - if(*delim == 0) { - /* this was the last round - we're done */ - NBC_DEBUG(5, "NBC_Progress_block: last round finished - we're done\n"); - break; - } else { - NBC_DEBUG(5, "NBC_Progress_block: round finished - goto next round\n"); - /* move delim to start of next round */ - delim = delim+1; - /* initializing handle for new virgin round */ - handle->row_offset = (long)delim - (long)*handle->schedule; - /* kick it off */ - res = NBC_Start_round(handle); - if(NBC_OK != res) { printf("Error in NBC_Start_round() (%i)\n", res); return res; } - } - } while(1); - - return NBC_OK; -} - -static __inline__ int NBC_Start_round(NBC_Handle *handle) { - int *numptr; /* number of operations */ - int i, res; - NBC_Fn_type *typeptr; - NBC_Args_send *sendargs; - NBC_Args_recv *recvargs; - NBC_Args_op *opargs; - NBC_Args_copy *copyargs; - NBC_Args_unpack *unpackargs; - NBC_Schedule myschedule; - void *buf1, *buf2, *buf3; - - /* get schedule address */ - myschedule = (NBC_Schedule*)((char*)*handle->schedule + handle->row_offset); - - numptr = (int*)myschedule; - NBC_DEBUG(10, "start_round round at address %p : posting %i operations\n", myschedule, *numptr); - - /* typeptr is increased by sizeof(int) bytes to point to type */ - typeptr = (NBC_Fn_type*)(numptr+1); - for (i=0; i<*numptr; i++) { - /* go sizeof op-data forward */ - switch(*typeptr) { - case SEND: - NBC_DEBUG(5," SEND (offset %li) ", (long)typeptr-(long)myschedule); - sendargs = (NBC_Args_send*)(typeptr+1); - NBC_DEBUG(5,"*buf: %p, count: %i, type: %lu, dest: %i, tag: %i)\n", sendargs->buf, sendargs->count, (unsigned long)sendargs->datatype, sendargs->dest, handle->tag); - typeptr = (NBC_Fn_type*)(((NBC_Args_send*)typeptr)+1); - /* get an additional request - TODO: req_count NOT thread safe */ - handle->req_count++; - /* get buffer */ - if(sendargs->tmpbuf) - buf1=(char*)handle->tmpbuf+(long)sendargs->buf; - else - buf1=sendargs->buf; -#ifdef OMPI_COMPONENT - handle->req_array = realloc(handle->req_array, (handle->req_count)*sizeof(ompi_request_t*)); - CHECK_NULL(handle->req_array); - /*res = MCA_PML_CALL(isend_init(buf1, sendargs->count, sendargs->datatype, sendargs->dest, handle->tag, MCA_PML_BASE_SEND_STANDARD, handle->mycomm, handle->req_array+handle->req_count-1)); - printf("MPI_Isend(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, sendargs->count, (unsigned long)sendargs->datatype, sendargs->dest, handle->tag, (unsigned long)handle->mycomm, res);*/ - res = MPI_Isend(buf1, sendargs->count, sendargs->datatype, sendargs->dest, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1); - if(OMPI_SUCCESS != res) { printf("Error in MPI_Isend(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, sendargs->count, (unsigned long)sendargs->datatype, sendargs->dest, handle->tag, (unsigned long)handle->mycomm, res); return res; } -#endif -#ifdef MPI - handle->req_array = realloc(handle->req_array, (handle->req_count)*sizeof(MPI_Request)); - CHECK_NULL(handle->req_array); - res = MPI_Isend(buf1, sendargs->count, sendargs->datatype, sendargs->dest, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1); - if(MPI_SUCCESS != res) { printf("Error in MPI_Isend(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, sendargs->count, (unsigned long)sendargs->datatype, sendargs->dest, handle->tag, (unsigned long)handle->mycomm, res); return res; } -#endif -#ifdef IB - handle->req_array = realloc(handle->req_array, (handle->req_count)*sizeof(IB_Request)); - CHECK_NULL(handle->req_array); - res = IB_Isend(buf1, sendargs->count, sendargs->datatype, sendargs->dest, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1); - if(NBC_OK != res) { printf("Error in IB_Isend() (%i)\n", res); return res; } -#endif - break; - case RECV: - NBC_DEBUG(5, " RECV (offset %li) ", (long)typeptr-(long)myschedule); - recvargs = (NBC_Args_recv*)(typeptr+1); - NBC_DEBUG(5, "*buf: %p, count: %i, type: %lu, source: %i, tag: %i)\n", recvargs->buf, recvargs->count, (unsigned long)recvargs->datatype, recvargs->source, handle->tag); - typeptr = (NBC_Fn_type*)(((NBC_Args_recv*)typeptr)+1); - /* get an additional request - TODO: req_count NOT thread safe */ - handle->req_count++; - /* get buffer */ - if(recvargs->tmpbuf) { - buf1=(char*)handle->tmpbuf+(long)recvargs->buf; - } else { - buf1=recvargs->buf; - } -#ifdef OMPI_COMPONENT - handle->req_array = realloc(handle->req_array, (handle->req_count)*sizeof(ompi_request_t*)); - CHECK_NULL(handle->req_array); - /*res = MCA_PML_CALL(irecv(buf1, recvargs->count, recvargs->datatype, recvargs->source, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1)); - printf("MPI_Irecv(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, recvargs->count, (unsigned long)recvargs->datatype, recvargs->source, handle->tag, (unsigned long)handle->mycomm, res); */ - res = MPI_Irecv(buf1, recvargs->count, recvargs->datatype, recvargs->source, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1); - if(OMPI_SUCCESS != res) { printf("Error in MPI_Irecv(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, recvargs->count, (unsigned long)recvargs->datatype, recvargs->source, handle->tag, (unsigned long)handle->mycomm, res); return res; } -#endif -#ifdef MPI - handle->req_array = realloc(handle->req_array, (handle->req_count)*sizeof(MPI_Request)); - CHECK_NULL(handle->req_array); - res = MPI_Irecv(buf1, recvargs->count, recvargs->datatype, recvargs->source, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1); - if(MPI_SUCCESS != res) { printf("Error in MPI_Irecv(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, recvargs->count, (unsigned long)recvargs->datatype, recvargs->source, handle->tag, (unsigned long)handle->mycomm, res); return res; } -#endif -#ifdef IB - handle->req_array = realloc(handle->req_array, (handle->req_count)*sizeof(IB_Request)); - CHECK_NULL(handle->req_array); - res = IB_Irecv(buf1, recvargs->count, recvargs->datatype, recvargs->source, handle->tag, handle->mycomm, handle->req_array+handle->req_count-1); - if(NBC_OK != res) { printf("Error in MPI_Irecv() (%i)\n", res); return res; } -#endif - break; - case OP: - NBC_DEBUG(5, " OP (offset %li) ", (long)typeptr-(long)myschedule); - opargs = (NBC_Args_op*)(typeptr+1); - NBC_DEBUG(5, "*buf1: %lu, buf2: %lu, count: %i, type: %lu)\n", (unsigned long)opargs->buf1, (unsigned long)opargs->buf2, opargs->count, (unsigned long)opargs->datatype); - typeptr = (NBC_Fn_type*)((NBC_Args_op*)typeptr+1); - /* get buffers */ - if(opargs->tmpbuf1) - buf1=(char*)handle->tmpbuf+(long)opargs->buf1; - else - buf1=opargs->buf1; - if(opargs->tmpbuf2) - buf2=(char*)handle->tmpbuf+(long)opargs->buf2; - else - buf2=opargs->buf2; - if(opargs->tmpbuf3) - buf3=(char*)handle->tmpbuf+(long)opargs->buf3; - else - buf3=opargs->buf3; - res = NBC_Operation(buf3, buf1, buf2, opargs->op, opargs->datatype, opargs->count); - if(res != NBC_OK) { printf("NBC_Operation() failed (code: %i)\n", res); return res; } - break; - case COPY: - NBC_DEBUG(5, " COPY (offset %li) ", (long)typeptr-(long)myschedule); - copyargs = (NBC_Args_copy*)(typeptr+1); - NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu, tgtcount: %i, tgttype: %lu)\n", (unsigned long)copyargs->src, copyargs->srccount, (unsigned long)copyargs->srctype, (unsigned long)copyargs->tgt, copyargs->tgtcount, (unsigned long)copyargs->tgttype); - typeptr = (NBC_Fn_type*)((NBC_Args_copy*)typeptr+1); - /* get buffers */ - if(copyargs->tmpsrc) - buf1=(char*)handle->tmpbuf+(long)copyargs->src; - else - buf1=copyargs->src; - if(copyargs->tmptgt) - buf2=(char*)handle->tmpbuf+(long)copyargs->tgt; - else - buf2=copyargs->tgt; - res = NBC_Copy(buf1, copyargs->srccount, copyargs->srctype, buf2, copyargs->tgtcount, copyargs->tgttype, handle->mycomm); - if(res != NBC_OK) { printf("NBC_Copy() failed (code: %i)\n", res); return res; } - break; - case UNPACK: - NBC_DEBUG(5, " UNPACK (offset %li) ", (long)typeptr-(long)myschedule); - unpackargs = (NBC_Args_unpack*)(typeptr+1); - NBC_DEBUG(5, "*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu\n", (unsigned long)unpackargs->inbuf, unpackargs->count, (unsigned long)unpackargs->datatype, (unsigned long)unpackargs->outbuf); - typeptr = (NBC_Fn_type*)((NBC_Args_unpack*)typeptr+1); - /* get buffers */ - if(unpackargs->tmpinbuf) - buf1=(char*)handle->tmpbuf+(long)unpackargs->inbuf; - else - buf1=unpackargs->outbuf; - if(unpackargs->tmpoutbuf) - buf2=(char*)handle->tmpbuf+(long)unpackargs->outbuf; - else - buf2=unpackargs->outbuf; - res = NBC_Unpack(buf1, unpackargs->count, unpackargs->datatype, buf2, handle->mycomm); - if(res != NBC_OK) { printf("NBC_Unpack() failed (code: %i)\n", res); return res; } - break; - default: - printf("NBC_Start_round: bad type %li at offset %li\n", (long)*typeptr, (long)typeptr-(long)myschedule); - return NBC_BAD_SCHED; - } - /* increase ptr by size of fn_type enum */ - typeptr = (NBC_Fn_type*)((NBC_Fn_type*)typeptr+1); - } - - /* check if we can make progress - not in the first round, this allows us to leave the - * initialization faster and to reach more overlap */ - if(handle->row_offset != 4) { - res = NBC_Progress(handle); - if((NBC_OK != res) && (NBC_CONTINUE != res)) { printf("Error in NBC_Progress() (%i)\n", res); return res; } - } - - return NBC_OK; -} - -__inline__ int NBC_Init_handle(NBC_Handle *handle, MPI_Comm comm) { - int res, flag; - NBC_Comminfo *comminfo; - - /* create a new state and return handle to it */ - handle->req_array = NULL; - handle->req_count = 0; - handle->comm = comm; - /* first int is the schedule size */ - handle->row_offset = sizeof(int); - - /******************** Do the tag and shadow comm administration ... ***************/ - -#ifdef OMPI_COMPONENT - /* the communicator is an ompi_communicator_t with a pointer to - * c_coll_selected_data where we have our comminfo struct - this was - * initialized during comm_query */ - { - ompi_communicator_t *ompicomm; - - ompicomm = (ompi_communicator_t*)comm; - comminfo = (NBC_Comminfo*)ompicomm->c_coll_selected_data; - flag = 1; - } -#else - /* otherwise we have to do the normal attribute stuff :-( */ - /* keyval is not initialized yet, we have to init it */ - if(MPI_KEYVAL_INVALID == gkeyval) { - res = MPI_Keyval_create(NBC_Key_copy, NBC_Key_delete, &(gkeyval), NULL); - if((MPI_SUCCESS != res)) { printf("Error in MPI_Keyval_create() (%i)\n", res); return res; } - } - - res = MPI_Attr_get(comm, gkeyval, &comminfo, &flag); - if((MPI_SUCCESS != res)) { printf("Error in MPI_Attr_get() (%i)\n", res); return res; } -#endif - if (flag) { - /* we found it */ - comminfo->tag++; - } else { - /* we have to create a new one */ - comminfo = NBC_Init_comm(comm); - if(comminfo == NULL) { printf("Error in NBC_Init_comm() %i\n", res); return NBC_OOR; } - } - handle->tag=comminfo->tag; - handle->mycomm=comminfo->mycomm; - /*printf("got comminfo: %lu tag: %i\n", comminfo, comminfo->tag);*/ - -#if 0 -/*#ifdef OMPI_COMPONENT*/ - /* we use negative tags for OMPI */ - if(handle->tag == -50) { - handle->tag=-32767; - comminfo->tag=-32767; - NBC_DEBUG(2,"resetting tags ...\n"); - } -#else -#endif - /* reset counter ... */ - if(handle->tag == 32767) { - handle->tag=1; - comminfo->tag=1; - NBC_DEBUG(2,"resetting tags ...\n"); - } -/*#endif*/ - - /******************** end of tag and shadow comm administration ... ***************/ - handle->comminfo = comminfo; - - NBC_DEBUG(3, "got tag %i\n", handle->tag); - - return NBC_OK; -} - -__inline__ NBC_Comminfo* NBC_Init_comm(MPI_Comm comm) { -#ifndef OMPI_COMPONENT - int res; -#endif - NBC_Comminfo *comminfo; - - comminfo = malloc(sizeof(NBC_Comminfo)); - if(comminfo == NULL) { printf("Error in malloc()\n"); return NULL; } - - -#ifdef OMPI_COMPONENT - comminfo->mycomm = comm; - /* set tag to 1 */ - /*comminfo->tag=-32767;*/ - comminfo->tag=1; -#else - /* set tag to 1 */ - comminfo->tag=1; - /* dup and save shadow communicator */ - res = MPI_Comm_dup(comm, &(comminfo->mycomm)); - if((MPI_SUCCESS != res)) { printf("Error in MPI_Comm_dup() (%i)\n", res); return NULL; } - NBC_DEBUG(1, "created a shadow communicator for %lu ... %lu\n", (unsigned long)comm, (unsigned long)comminfo->mycomm); -#endif - -#ifdef NBC_CACHE_SCHEDULE - /* initialize the NBC_ALLTOALL SchedCache tree */ - comminfo->NBC_Dict[NBC_ALLTOALL] = hb_tree_new((dict_cmp_func)NBC_Alltoall_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete); - if(comminfo->NBC_Dict[NBC_ALLTOALL] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; } - NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_ALLTOALL]); - comminfo->NBC_Dict_size[NBC_ALLTOALL] = 0; - /* initialize the NBC_ALLGATHER SchedCache tree */ - comminfo->NBC_Dict[NBC_ALLGATHER] = hb_tree_new((dict_cmp_func)NBC_Allgather_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete); - if(comminfo->NBC_Dict[NBC_ALLGATHER] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; } - NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_ALLGATHER]); - comminfo->NBC_Dict_size[NBC_ALLGATHER] = 0; - /* initialize the NBC_ALLREDUCE SchedCache tree */ - comminfo->NBC_Dict[NBC_ALLREDUCE] = hb_tree_new((dict_cmp_func)NBC_Allreduce_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete); - if(comminfo->NBC_Dict[NBC_ALLREDUCE] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; } - NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_ALLREDUCE]); - comminfo->NBC_Dict_size[NBC_ALLREDUCE] = 0; - /* initialize the NBC_BARRIER SchedCache tree - is not needed - - * schedule is hung off directly */ - comminfo->NBC_Dict_size[NBC_BARRIER] = 0; - /* initialize the NBC_BCAST SchedCache tree */ - comminfo->NBC_Dict[NBC_BCAST] = hb_tree_new((dict_cmp_func)NBC_Bcast_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete); - if(comminfo->NBC_Dict[NBC_BCAST] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; } - NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_BCAST]); - comminfo->NBC_Dict_size[NBC_BCAST] = 0; - /* initialize the NBC_GATHER SchedCache tree */ - comminfo->NBC_Dict[NBC_GATHER] = hb_tree_new((dict_cmp_func)NBC_Gather_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete); - if(comminfo->NBC_Dict[NBC_GATHER] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; } - NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_GATHER]); - comminfo->NBC_Dict_size[NBC_GATHER] = 0; - /* initialize the NBC_REDUCE SchedCache tree */ - comminfo->NBC_Dict[NBC_REDUCE] = hb_tree_new((dict_cmp_func)NBC_Reduce_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete); - if(comminfo->NBC_Dict[NBC_REDUCE] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; } - NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_REDUCE]); - comminfo->NBC_Dict_size[NBC_REDUCE] = 0; - /* initialize the NBC_SCAN SchedCache tree */ - comminfo->NBC_Dict[NBC_SCAN] = hb_tree_new((dict_cmp_func)NBC_Scan_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete); - if(comminfo->NBC_Dict[NBC_SCAN] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; } - NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_SCAN]); - comminfo->NBC_Dict_size[NBC_SCAN] = 0; - /* initialize the NBC_SCATTER SchedCache tree */ - comminfo->NBC_Dict[NBC_SCATTER] = hb_tree_new((dict_cmp_func)NBC_Scatter_args_compare, NBC_SchedCache_args_delete_key_dummy, NBC_SchedCache_args_delete); - if(comminfo->NBC_Dict[NBC_SCATTER] == NULL) { printf("Error in hb_tree_new()\n"); return NULL; } - NBC_DEBUG(1, "added tree at address %lu\n", (unsigned long)comminfo->NBC_Dict[NBC_SCATTER]); - comminfo->NBC_Dict_size[NBC_SCATTER] = 0; -#endif - -#ifndef OMPI_COMPONENT - /* put the new attribute to the comm */ - res = MPI_Attr_put(comm, gkeyval, comminfo); - if((MPI_SUCCESS != res)) { printf("Error in MPI_Attr_put() (%i)\n", res); return NULL; } -#endif - - return comminfo; -} - -__inline__ int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule) { - int res; - - handle->schedule = schedule; - /* kick off first round */ - res = NBC_Start_round(handle); - if((NBC_OK != res)) { printf("Error in NBC_Start_round() (%i)\n", res); return res; } - - return NBC_OK; -} - -int NBC_Wait_poll(NBC_Handle *handle) { - int res; - - while(NBC_OK != NBC_Progress(handle)); - - res = NBC_Free(handle); - if((NBC_OK != res)) { printf("Error in NBC_Free() (%i)\n", res); return res; } - - return NBC_OK; -} - -int NBC_Wait(NBC_Handle *handle) { - int res; - - NBC_Progress_block(handle); - - res = NBC_Free(handle); - if((NBC_OK != res)) { printf("Error in NBC_Free() (%i)\n", res); return res; } - - return NBC_OK; -} - -#ifdef NBC_CACHE_SCHEDULE -void NBC_SchedCache_args_delete_key_dummy(void *k) { - /* do nothing because the key and the data element are identical :-) - * both (the single one :) is freed in NBC__args_delete() */ -} - - -void NBC_SchedCache_args_delete(void *entry) { - struct NBC_dummyarg *tmp; - - tmp = (struct NBC_dummyarg*)entry; - /* free taglistentry */ - free(*(tmp->schedule)); - /* the schedule pointer itself is also malloc'd */ - free(tmp->schedule); - free(tmp); -} -#endif diff --git a/ompi/mca/coll/libnbc/nbc.h b/ompi/mca/coll/libnbc/nbc.h deleted file mode 100644 index f55ee8df6c..0000000000 --- a/ompi/mca/coll/libnbc/nbc.h +++ /dev/null @@ -1,657 +0,0 @@ -#ifndef __NBC_H__ -#define __NBC_H__ - -/*********************** LibNBC tuning parameters ************************/ - -/* the debug level */ -#define DLEVEL 0 - -/* use MPI, InfiniBand (define IB), or Open MPI (OMPI_COMPONENT) backend */ -#define OMPI_COMPONENT - -/* enable schedule caching - undef NBC_CACHE_SCHEDULE to deactivate it */ -#define NBC_CACHE_SCHEDULE -#define SCHED_DICT_UPPER 1024 /* max. number of dict entries */ -#define SCHED_DICT_LOWER 512 /* nuber of dict entries after wipe, if SCHED_DICT_UPPER is reached */ - -/********************* end of LibNBC tuning parameters ************************/ - -#ifndef OMPI_COMPONENT -/* include mpi.h directly in case of OMPI */ -#include -#else -#include "ompi_config.h" -#include "mpi.h" -#include "ompi/communicator/communicator.h" -#include "ompi/constants.h" -#include "ompi/datatype/datatype.h" -#include "ompi/mca/coll/coll.h" -#include "ompi/request/request.h" -#include "ompi/mca/pml/pml.h" -#endif -#include -#include -#include -#include -#include -#include "dict.h" -#ifdef IB -#include "ib.h" -#endif - -/* if we use MPI-1, MPI_IN_PLACE is not defined :-( */ -#ifndef MPI_IN_PLACE -#define MPI_IN_PLACE (void*)1 -#endif - -/* restore inline behavior for non-gcc compilers */ -#ifndef __GNUC__ -#define __inline__ inline -#endif - -/* log(2) */ -#define LOG2 0.69314718055994530941 - -#ifdef __cplusplus -extern "C" { -#endif - -/* Function return codes */ -#define NBC_OK 0 /* everything went fine */ -#define NBC_SUCCESS 0 /* everything went fine (MPI compliant :) */ -#define NBC_OOR 1 /* out of resources */ -#define NBC_BAD_SCHED 2 /* bad schedule */ -#define NBC_CONTINUE 3 /* progress not done */ -#define NBC_DATATYPE_NOT_SUPPORTED 4 /* datatype not supported or not valid */ -#define NBC_OP_NOT_SUPPORTED 5 /* operation not supported or not valid */ -#define NBC_NOT_IMPLEMENTED 6 -#define NBC_INVALID_PARAM 7 /* invalid parameters */ - -/* true/false */ -#define true 1 -#define false 0 - -/* all collectives */ -#define NBC_ALLGATHER 0 -#define NBC_ALLGATHERV 1 -#define NBC_ALLREDUCE 2 -#define NBC_ALLTOALL 3 -#define NBC_ALLTOALLV 4 -#define NBC_ALLTOALLW 5 -#define NBC_BARRIER 6 -#define NBC_BCAST 7 -#define NBC_EXSCAN 8 -#define NBC_GATHER 9 -#define NBC_GATHERV 10 -#define NBC_REDUCE 11 -#define NBC_REDUCESCAT 12 -#define NBC_SCAN 13 -#define NBC_SCATTER 14 -#define NBC_SCATTERV 15 -#define NBC_NUM_COLL 16 - -/* dirty trick to avoid fn call :) */ -#define NBC_Test NBC_Progress - -/* several typedefs for NBC */ - -/* a schedule is basically a pointer to some memory location where the - * schedule array resides */ -typedef void* NBC_Schedule; - -/* the function type enum */ -typedef enum { - SEND, - RECV, - OP, - COPY, - UNPACK -} NBC_Fn_type; - -/* the send argument struct */ -typedef struct { - void *buf; - char tmpbuf; - int count; - MPI_Datatype datatype; - int dest; -} NBC_Args_send; - -/* the receive argument struct */ -typedef struct { - void *buf; - char tmpbuf; - int count; - MPI_Datatype datatype; - int source; -} NBC_Args_recv; - -/* the operation argument struct */ -typedef struct { - void *buf1; - char tmpbuf1; - void *buf2; - char tmpbuf2; - void *buf3; - char tmpbuf3; - int count; - MPI_Op op; - MPI_Datatype datatype; -} NBC_Args_op; - -/* the copy argument struct */ -typedef struct { - void *src; - char tmpsrc; - int srccount; - MPI_Datatype srctype; - void *tgt; - char tmptgt; - int tgtcount; - MPI_Datatype tgttype; -} NBC_Args_copy; - -/* unpack operation arguments */ -typedef struct { - void *inbuf; - char tmpinbuf; - int count; - MPI_Datatype datatype; - void *outbuf; - char tmpoutbuf; -} NBC_Args_unpack; - -/* used to hang off a communicator */ -typedef struct { - MPI_Comm mycomm; /* save the shadow communicator here */ - int tag; -#ifdef NBC_CACHE_SCHEDULE - hb_tree *NBC_Dict[NBC_NUM_COLL]; /* this should be an array */ - int NBC_Dict_size[NBC_NUM_COLL]; -#endif -} NBC_Comminfo; - -/* thread specific data */ -typedef struct { - MPI_Comm comm; - MPI_Comm mycomm; - long row_offset; - int tag; - int req_count; -#ifdef OMPI_COMPONENT - /*ompi_request_t **req_array;*/ - MPI_Request *req_array; -#endif -#ifdef MPI - MPI_Request *req_array; -#endif -#ifdef IB - IB_Request *req_array; -#endif - NBC_Comminfo *comminfo; - NBC_Schedule *schedule; - void *tmpbuf; /* temporary buffer e.g. used for Reduce */ -/* we should make a handle pointer to a state later */ -} NBC_Handle; - -/* internal function prototypes */ -int NBC_Sched_create(NBC_Schedule* schedule); -int NBC_Sched_send(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int dest, NBC_Schedule *schedule); -int NBC_Sched_recv(void* buf, char tmpbuf, int count, MPI_Datatype datatype, int source, NBC_Schedule *schedule); -int NBC_Sched_op(void* buf3, char tmpbuf3, void* buf1, char tmpbuf1, void* buf2, char tmpbuf2, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule); -int NBC_Sched_copy(void *src, char tmpsrc, int srccount, MPI_Datatype srctype, void *tgt, char tmptgt, int tgtcount, MPI_Datatype tgttype, NBC_Schedule *schedule); -int NBC_Sched_unpack(void *inbuf, char tmpinbuf, int count, MPI_Datatype datatype, void *outbuf, char tmpoutbuf, NBC_Schedule *schedule); -int NBC_Sched_barrier(NBC_Schedule *schedule); -int NBC_Sched_commit(NBC_Schedule *schedule); - -#ifdef NBC_CACHE_SCHEDULE -/* this is a dummy structure which is used to get the schedule out of - * the collop sepcific structure. The schedule pointer HAS to be at the - * first position and should NOT BE REORDERED by the compiler (C - * guarantees that */ -struct NBC_dummyarg { - NBC_Schedule *schedule; -}; - -typedef struct { - NBC_Schedule *schedule; - void *sendbuf; - int sendcount; - MPI_Datatype sendtype; - void* recvbuf; - int recvcount; - MPI_Datatype recvtype; -} NBC_Alltoall_args; -int NBC_Alltoall_args_compare(NBC_Alltoall_args *a, NBC_Alltoall_args *b, void *param); - -typedef struct { - NBC_Schedule *schedule; - void *sendbuf; - int sendcount; - MPI_Datatype sendtype; - void* recvbuf; - int recvcount; - MPI_Datatype recvtype; -} NBC_Allgather_args; -int NBC_Allgather_args_compare(NBC_Allgather_args *a, NBC_Allgather_args *b, void *param); - -typedef struct { - NBC_Schedule *schedule; - void *sendbuf; - void* recvbuf; - int count; - MPI_Datatype datatype; - MPI_Op op; -} NBC_Allreduce_args; -int NBC_Allreduce_args_compare(NBC_Allreduce_args *a, NBC_Allreduce_args *b, void *param); - -typedef struct { - NBC_Schedule *schedule; - void *buffer; - int count; - MPI_Datatype datatype; - int root; -} NBC_Bcast_args; -int NBC_Bcast_args_compare(NBC_Bcast_args *a, NBC_Bcast_args *b, void *param); - -typedef struct { - NBC_Schedule *schedule; - void *sendbuf; - int sendcount; - MPI_Datatype sendtype; - void* recvbuf; - int recvcount; - MPI_Datatype recvtype; - int root; -} NBC_Gather_args; -int NBC_Gather_args_compare(NBC_Gather_args *a, NBC_Gather_args *b, void *param); - -typedef struct { - NBC_Schedule *schedule; - void *sendbuf; - void* recvbuf; - int count; - MPI_Datatype datatype; - MPI_Op op; - int root; -} NBC_Reduce_args; -int NBC_Reduce_args_compare(NBC_Reduce_args *a, NBC_Reduce_args *b, void *param); - -typedef struct { - NBC_Schedule *schedule; - void *sendbuf; - void* recvbuf; - int count; - MPI_Datatype datatype; - MPI_Op op; -} NBC_Scan_args; -int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param); - -typedef struct { - NBC_Schedule *schedule; - void *sendbuf; - int sendcount; - MPI_Datatype sendtype; - void* recvbuf; - int recvcount; - MPI_Datatype recvtype; - int root; -} NBC_Scatter_args; -int NBC_Scatter_args_compare(NBC_Scatter_args *a, NBC_Scatter_args *b, void *param); - - -/* Schedule cache structures/functions */ -u_int32_t adler32(u_int32_t adler, int8_t *buf, int len); -void NBC_SchedCache_args_delete(void *entry); -void NBC_SchedCache_args_delete_key_dummy(void *k); - -#endif - - -int NBC_Free(NBC_Handle *handle); -int NBC_Progress(NBC_Handle *handle); -int NBC_Progress_block(NBC_Handle *handle); -__inline__ int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule); -__inline__ int NBC_Init_handle(NBC_Handle *handle, MPI_Comm comm); -int NBC_Operation(void *buf3, void *buf1, void *buf2, MPI_Op op, MPI_Datatype type, int count); -static __inline__ int NBC_Type_intrinsic(MPI_Datatype type); -static __inline__ int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm); -__inline__ NBC_Comminfo* NBC_Init_comm(MPI_Comm comm); - -/* external function prototypes */ -int NBC_Ibarrier(MPI_Comm comm, NBC_Handle* handle); -int NBC_Ibcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, NBC_Handle* handle); -int NBC_Ibcast_lin(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, NBC_Handle* handle); -int NBC_Ialltoallv(void* sendbuf, int *sendcounts, int *sdispls, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *rdispls, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle* handle); -int NBC_Igather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle); -int NBC_Igatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle); -int NBC_Iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle); -int NBC_Iscatterv(void* sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle); -int NBC_Iallgather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle); -int NBC_Iallgatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle); -int NBC_Ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle); -int NBC_Ireduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, NBC_Handle* handle); -int NBC_Iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, NBC_Handle* handle); -int NBC_Iscan(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, NBC_Handle* handle); -int NBC_Ireduce_scatter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, NBC_Handle* handle); -int NBC_Wait(NBC_Handle *handle); -int NBC_Wait_poll(NBC_Handle *handle); - -/* some macros */ - -/* a schedule has the following format: - * [schedule] ::= [size][round-schedule][delimiter][round-schedule][delimiter]...[end] - * [size] ::= size of the schedule (int) - * [round-schedule] ::= [num][type][type-args][type][type-args]... - * [num] ::= number of elements in round (int) - * [type] ::= function type (NBC_Fn_type) - * [type-args] ::= type specific arguments (NBC_Args_send, NBC_Args_recv or, NBC_Args_op) - * [delimiter] ::= 1 (char) - indicates that a round follows - * [end] ::= 0 (char) - indicates that this is the last round - */ - -/* NBC_GET_ROUND_SIZE returns the size in bytes of a round of a NBC_Schedule - * schedule. A round has the format: - * [num]{[type][type-args]} - * e.g. [(int)2][(NBC_Fn_type)SEND][(NBC_Args_send)SEND-ARGS][(NBC_Fn_type)RECV][(NBC_Args_recv)RECV-ARGS] */ -#define NBC_GET_ROUND_SIZE(schedule, size) \ - { \ - int *numptr; \ - NBC_Fn_type *typeptr; \ - int i; \ - \ - numptr = (int*)schedule; \ - /*NBC_DEBUG(10, "GET_ROUND_SIZE got %i elements\n", *numptr); */\ - /* end is increased by sizeof(int) bytes to point to type */ \ - typeptr = (NBC_Fn_type*)((int*)(schedule)+1); \ - for (i=0; i<*numptr; i++) { \ - /* go sizeof op-data forward */ \ - switch(*typeptr) { \ - case SEND: \ - /*printf("found a SEND at offset %i\n", (int)typeptr-(int)schedule); */\ - typeptr = (NBC_Fn_type*)((NBC_Args_send*)typeptr+1); \ - break; \ - case RECV: \ - /*printf("found a RECV at offset %i\n", (int)typeptr-(int)schedule); */\ - typeptr = (NBC_Fn_type*)((NBC_Args_recv*)typeptr+1); \ - break; \ - case OP: \ - /*printf("found a OP at offset %i\n", (int)typeptr-(int)schedule); */\ - typeptr = (NBC_Fn_type*)((NBC_Args_op*)typeptr+1); \ - break; \ - case COPY: \ - /*printf("found a COPY at offset %i\n", (int)typeptr-(int)schedule); */\ - typeptr = (NBC_Fn_type*)((NBC_Args_copy*)typeptr+1); \ - break; \ - case UNPACK: \ - /*printf("found a UNPACK at offset %i\n", (int)typeptr-(int)schedule); */\ - typeptr = (NBC_Fn_type*)((NBC_Args_unpack*)typeptr+1); \ - break; \ - default: \ - printf("NBC_GET_ROUND_SIZE: bad type %li at offset %li\n", (long)*typeptr, (long)typeptr-(long)schedule); \ - return NBC_BAD_SCHED; \ - } \ - /* increase ptr by size of fn_type enum */ \ - typeptr = (NBC_Fn_type*)((NBC_Fn_type*)typeptr+1); \ - } \ - /* this could be optimized if typeptr would be used directly */ \ - size = (long)typeptr-(long)schedule; \ - } - -/* returns the size of a schedule in bytes */ -#define NBC_GET_SIZE(schedule, size) \ -{ \ - size=*(int*)schedule; \ -} - -/* increase the size of a schedule by size bytes */ -#define NBC_INC_SIZE(schedule, size) \ -{ \ - *(int*)schedule+=size; \ -} - -/* increments the number of operations in the last round */ -#define NBC_INC_NUM_ROUND(schedule) \ -{ \ - int total_size; \ - long round_size; \ - char *ptr, *lastround; \ - \ - NBC_GET_SIZE(schedule, total_size); \ - \ - /* ptr begins at first round (first int is overall size) */ \ - ptr = (char*)((char*)schedule+sizeof(int)); \ - lastround = ptr; \ - while ((long)ptr-(long)schedule < total_size) { \ - NBC_GET_ROUND_SIZE(ptr, round_size); \ - /*printf("got round size %i\n", round_size);*/ \ - lastround = ptr; \ - /* add round size */ \ - ptr=ptr+round_size; \ - /* add sizeof(char) as barrier delimiter */ \ - ptr=ptr+sizeof(char); \ - /*printf("(int)ptr-(int)schedule=%i, size=%i\n", (int)ptr-(int)schedule, size); */\ - } \ - /*printf("lastround count is at offset: %i\n", (int)lastround-(int)schedule);*/ \ - /* this is the count in the last round of the schedule */ \ - (*(int*)lastround)++; \ -} - -/* NBC_PRINT_ROUND prints a round in a schedule. A round has the format: - * [num]{[op][op-data]} types: [int]{[enum][op-type]} - * e.g. [2][SEND][SEND-ARGS][RECV][RECV-ARGS] */ -#define NBC_PRINT_ROUND(schedule) \ - { \ - int myrank, *numptr; \ - NBC_Fn_type *typeptr; \ - NBC_Args_send *sendargs; \ - NBC_Args_recv *recvargs; \ - NBC_Args_op *opargs; \ - NBC_Args_copy *copyargs; \ - NBC_Args_unpack *unpackargs; \ - int i; \ - \ - numptr = (int*)schedule; \ - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); \ - printf("has %i actions: \n", *numptr); \ - /* end is increased by sizeof(int) bytes to point to type */ \ - typeptr = (NBC_Fn_type*)((int*)(schedule)+1); \ - for (i=0; i<*numptr; i++) { \ - /* go sizeof op-data forward */ \ - switch(*typeptr) { \ - case SEND: \ - printf("[%i] SEND (offset %li) ", myrank, (long)typeptr-(long)schedule); \ - sendargs = (NBC_Args_send*)(typeptr+1); \ - printf("*buf: %lu, count: %i, type: %lu, dest: %i)\n", (unsigned long)sendargs->buf, sendargs->count, (unsigned long)sendargs->datatype, sendargs->dest); \ - typeptr = (NBC_Fn_type*)((NBC_Args_send*)typeptr+1); \ - break; \ - case RECV: \ - printf("[%i] RECV (offset %li) ", myrank, (long)typeptr-(long)schedule); \ - recvargs = (NBC_Args_recv*)(typeptr+1); \ - printf("*buf: %lu, count: %i, type: %lu, source: %i)\n", (unsigned long)recvargs->buf, recvargs->count, (unsigned long)recvargs->datatype, recvargs->source); \ - typeptr = (NBC_Fn_type*)((NBC_Args_recv*)typeptr+1); \ - break; \ - case OP: \ - printf("[%i] OP (offset %li) ", myrank, (long)typeptr-(long)schedule); \ - opargs = (NBC_Args_op*)(typeptr+1); \ - printf("*buf1: %lu, buf2: %lu, count: %i, type: %lu)\n", (unsigned long)opargs->buf1, (unsigned long)opargs->buf2, opargs->count, (unsigned long)opargs->datatype); \ - typeptr = (NBC_Fn_type*)((NBC_Args_op*)typeptr+1); \ - break; \ - case COPY: \ - printf("[%i] COPY (offset %li) ", myrank, (long)typeptr-(long)schedule); \ - copyargs = (NBC_Args_copy*)(typeptr+1); \ - printf("*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu, tgtcount: %i, tgttype: %lu)\n", (unsigned long)copyargs->src, copyargs->srccount, (unsigned long)copyargs->srctype, (unsigned long)copyargs->tgt, copyargs->tgtcount, (unsigned long)copyargs->tgttype); \ - typeptr = (NBC_Fn_type*)((NBC_Args_copy*)typeptr+1); \ - break; \ - case UNPACK: \ - printf("[%i] UNPACK (offset %li) ", myrank, (long)typeptr-(long)schedule); \ - unpackargs = (NBC_Args_unpack*)(typeptr+1); \ - printf("*src: %lu, srccount: %i, srctype: %lu, *tgt: %lu\n",(unsigned long)unpackargs->inbuf, unpackargs->count, (unsigned long)unpackargs->datatype, (unsigned long)unpackargs->outbuf); \ - typeptr = (NBC_Fn_type*)((NBC_Args_unpack*)typeptr+1); \ - break; \ - default: \ - printf("[%i] NBC_PRINT_ROUND: bad type %li at offset %li\n", myrank, (long)*typeptr, (long)typeptr-(long)schedule); \ - return NBC_BAD_SCHED; \ - } \ - /* increase ptr by size of fn_type enum */ \ - typeptr = (NBC_Fn_type*)((NBC_Fn_type*)typeptr+1); \ - } \ - printf("\n"); \ - } - -#define NBC_PRINT_SCHED(schedule) \ -{ \ - int size, myrank; \ - long round_size; \ - char *ptr; \ - \ - NBC_GET_SIZE(schedule, size); \ - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); \ - printf("[%i] printing schedule of size %i\n", myrank, size); \ - \ - /* ptr begins at first round (first int is overall size) */ \ - ptr = (char*)((char*)schedule+sizeof(int)); \ - while ((long)ptr-(long)schedule < size) { \ - NBC_GET_ROUND_SIZE(ptr, round_size); \ - printf("[%i] Round at byte %li (size %li) ", myrank, (long)ptr-(long)schedule, round_size); \ - NBC_PRINT_ROUND(ptr); \ - /* add round size */ \ - ptr=ptr+round_size; \ - /* add sizeof(char) as barrier delimiter */ \ - ptr=ptr+sizeof(char); \ - } \ -} - -#define CHECK_NULL(ptr) \ -{ \ - if(ptr == NULL) { \ - printf("realloc error :-(\n"); \ - } \ -} - - -/* -#define NBC_DEBUG(level, ...) {} -*/ - -static __inline__ void NBC_DEBUG(int level, const char *fmt, ...) -{ - va_list ap; - int rank; - - if(DLEVEL > level) { - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - printf("[%i] ", rank); - va_start(ap, fmt); - vprintf(fmt, ap); - va_end (ap); - } -} - -/* returns true (1) or false (0) if type is intrinsic or not */ -static __inline__ int NBC_Type_intrinsic(MPI_Datatype type) { - - if( ( type == MPI_INT ) || - ( type == MPI_LONG ) || - ( type == MPI_SHORT ) || - ( type == MPI_UNSIGNED ) || - ( type == MPI_UNSIGNED_SHORT ) || - ( type == MPI_UNSIGNED_LONG ) || - ( type == MPI_FLOAT ) || - ( type == MPI_DOUBLE ) || - ( type == MPI_LONG_DOUBLE ) || - ( type == MPI_BYTE ) || - ( type == MPI_FLOAT_INT) || - ( type == MPI_DOUBLE_INT) || - ( type == MPI_LONG_INT) || - ( type == MPI_2INT) || - ( type == MPI_SHORT_INT) || - ( type == MPI_LONG_DOUBLE_INT)) - return 1; - else - return 0; -} - -/* let's give a try to inline functions */ -static __inline__ int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) { - int size, pos, res; - MPI_Aint ext; - void *packbuf; - - if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) { - /* if we have the same types and they are contiguous (intrinsic - * types are contiguous), we can just use a single memcpy */ - res = MPI_Type_extent(srctype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - memcpy(tgt, src, srccount*ext); - } else { - /* we have to pack and unpack */ - res = MPI_Pack_size(srccount, srctype, comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; } - packbuf = malloc(size); - if (NULL == packbuf) { printf("Error in malloc()\n"); return res; } - pos=0; - res = MPI_Pack(src, srccount, srctype, packbuf, size, &pos, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; } - pos=0; - res = MPI_Unpack(packbuf, size, &pos, tgt, tgtcount, tgttype, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Unpack() (%i)\n", res); return res; } - free(packbuf); - } - - return NBC_OK; -} - -static __inline__ int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void *tgt, MPI_Comm comm) { - int size, pos, res; - MPI_Aint ext; - - if(NBC_Type_intrinsic(srctype)) { - /* if we have the same types and they are contiguous (intrinsic - * types are contiguous), we can just use a single memcpy */ - res = MPI_Type_extent(srctype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - memcpy(tgt, src, srccount*ext); - - } else { - /* we have to unpack */ - res = MPI_Pack_size(srccount, srctype, comm, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; } - pos=0; - res = MPI_Unpack(src, size, &pos, tgt, srccount, srctype, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Unpack() (%i)\n", res); return res; } - } - - return NBC_OK; -} - -/* deletes elements from dict until low watermark is reached */ -static __inline__ void NBC_SchedCache_dictwipe(hb_tree *dict, int *size) { - hb_itor *itor; - - itor = hb_itor_new(dict); - for (; hb_itor_valid(itor) && (*size>SCHED_DICT_LOWER); hb_itor_next(itor)) { - hb_tree_remove(dict, hb_itor_key(itor), 0); - *size = *size-1; - } - hb_itor_destroy(itor); -} - -#define NBC_IN_PLACE(sendbuf, recvbuf, inplace) \ -{ \ - inplace = 0; \ - if(sendbuf == MPI_IN_PLACE) { \ - sendbuf = recvbuf; \ - inplace = 1; \ - } \ - if(recvbuf == MPI_IN_PLACE) { \ - recvbuf = sendbuf; \ - inplace = 1; \ - } \ -} - -#ifdef __cplusplus -} -#endif -#endif diff --git a/ompi/mca/coll/libnbc/nbc_iallgather.c b/ompi/mca/coll/libnbc/nbc_iallgather.c deleted file mode 100644 index 7b9d360e14..0000000000 --- a/ompi/mca/coll/libnbc/nbc_iallgather.c +++ /dev/null @@ -1,202 +0,0 @@ -#include "nbc.h" - -#ifdef NBC_CACHE_SCHEDULE -/* tree comparison function for schedule cache */ -int NBC_Allgather_args_compare(NBC_Allgather_args *a, NBC_Allgather_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && - (a->sendcount == b->sendcount) && - (a->sendtype == b->sendtype) && - (a->recvbuf == b->recvbuf) && - (a->recvcount == b->recvcount) && - (a->recvtype == b->recvtype) ) { - return 0; - } - if( a->sendbuf < b->sendbuf ) { - return -1; - } - return +1; -} -#endif - -/* simple linear MPI_Iallgather - * the algorithm uses p-1 rounds - * each node sends the packet it received last round (or has in round 0) to it's right neighbor (modulo p) - * each node receives from it's left (modulo p) neighbor */ -int NBC_Iallgather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle) { - int rank, p, res, r; - MPI_Aint rcvext, sndext; - NBC_Schedule *schedule; - char *rbuf, *sbuf, inplace; -#ifdef NBC_CACHE_SCHEDULE - NBC_Allgather_args *args, *found, search; -#endif - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - handle->tmpbuf = NULL; - - if(!((rank == 0) && inplace)) { - /* copy my data to receive buffer */ - rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext); - res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - -#ifdef NBC_CACHE_SCHEDULE - /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.sendcount=sendcount; - search.sendtype=sendtype; - search.recvbuf=recvbuf; - search.recvcount=recvcount; - search.recvtype=recvtype; - found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_ALLGATHER], &search); - if(found == NULL) { -#endif - schedule = malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - res = NBC_Sched_create(schedule); - if(NBC_OK != res) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; } - - sbuf = ((char *)recvbuf) + (rank*recvcount*rcvext); - /* do p-1 rounds */ - for(r=0;rsendbuf=sendbuf; - args->sendcount=sendcount; - args->sendtype=sendtype; - args->recvbuf=recvbuf; - args->recvcount=recvcount; - args->recvtype=recvtype; - args->schedule=schedule; - res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_ALLGATHER], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_ALLGATHER] > SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_ALLGATHER], &handle->comminfo->NBC_Dict_size[NBC_ALLGATHER]); - } - } else { - /* found schedule */ - schedule=found->schedule; - } -#endif - - /*NBC_PRINT_SCHED(*schedule);*/ - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } - - return NBC_OK; -} - - -/* this is a new possible dissemination based allgather algorithm - we should - * try it some time (big comm, small data) */ -#if 0 - -static __inline__ void diss_unpack(int rank, int vrank, int round, int p, int *pos, void *tmpbuf, int datasize, int slotsize, void *recvbuf, int sendcount, MPI_Datatype sendtype, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Schedule *schedule) { - int r, res; - char *sbuf, *rbuf; - - sbuf = (char *)tmpbuf + (*pos*datasize); - rbuf = (char *)recvbuf + (vrank*slotsize); - printf("[%i] unpacking tmpbuf pos: %i (%lu) to rbuf elem: %i (%lu) - %i elems, datasize %i\n", rank, *pos, (unsigned long)sbuf, vrank, (unsigned long)rbuf, recvcount, datasize); - res = NBC_Sched_unpack(sbuf, recvcount, recvtype, rbuf, schedule); - if (NBC_OK != res) { printf("Error in NBC_Unpack() (%i)\n", res); } - *pos=*pos+1; - - for(r=0; r<=round; r++) { - if(r != 0) { - diss_unpack(rank, (vrank-(1<<(r-1))+p)%p, r-1, p, pos, tmpbuf, datasize, slotsize, recvbuf, sendcount, sendtype, recvcount, recvtype, comm, schedule); - } - } -} - -static __inline__ int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle) { - int res, r, maxround, size, speer, rpeer, pos, datasize; - char *sbuf, *rbuf; - - res = NBC_OK; - if(p < 2) return res; - - maxround = (int)ceil((log(p)/LOG2)); - - if(NBC_Type_intrinsic(sendtype)) { - datasize = sndext*sendcount; - } else { - res = MPI_Pack_size(sendcount, sendtype, comm, &datasize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; } - } - - /* tmpbuf is probably bigger than p -> next power of 2 */ - handle->tmpbuf=malloc(datasize*(1<tmpbuf, sbuf, datasize); - } else { - pos = 0; - res = MPI_Pack(sbuf, sendcount, sendtype, handle->tmpbuf, datasize, &pos, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; } - } - - printf("[%i] receive buffer is at %lu of size %i, maxround: %i\n", rank, (unsigned long)handle->tmpbuf, (int)sndext*sendcount*(1<tmpbuf+size; - sbuf = (char*)handle->tmpbuf; - - speer = (rank + (1<tmpbuf, datasize, recvcount*rcvext, recvbuf, sendcount, sendtype, recvcount, recvtype, comm, schedule); - - return NBC_OK; -} -#endif diff --git a/ompi/mca/coll/libnbc/nbc_iallgatherv.c b/ompi/mca/coll/libnbc/nbc_iallgatherv.c deleted file mode 100644 index 12ba027d84..0000000000 --- a/ompi/mca/coll/libnbc/nbc_iallgatherv.c +++ /dev/null @@ -1,70 +0,0 @@ -#include "nbc.h" - -/* an allgatherv schedule can not be cached easily because the contents - * ot the recvcounts array may change, so a comparison of the address - * would not be sufficient ... we simply do not cache it */ - -/* simple linear MPI_Iallgatherv - * the algorithm uses p-1 rounds - * first round: - * each node sends to it's left node (rank+1)%p sendcount elements - * each node begins with it's right node (rank-11)%p and receives from it recvcounts[(rank+1)%p] elements - * second round: - * each node sends to node (rank+2)%p sendcount elements - * each node receives from node (rank-2)%p recvcounts[(rank+2)%p] elements */ -int NBC_Iallgatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle) { - int rank, p, res, r, speer, rpeer; - MPI_Aint rcvext, sndext; - NBC_Schedule *schedule; - char *rbuf, *sbuf, inplace; - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - schedule = malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; } - - if(!((rank == 0) && inplace)) { - /* copy my data to receive buffer */ - rbuf = ((char *)recvbuf) + (displs[rank]*rcvext); - NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcounts[rank], recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - - sbuf = ((char *)recvbuf) + (displs[rank]*rcvext); - /* do p-1 rounds */ - for(r=1;rsendbuf == b->sendbuf) && - (a->recvbuf == b->recvbuf) && - (a->count == b->count) && - (a->datatype == b->datatype) && - (a->op == b->op) ) { - return 0; - } - if( a->sendbuf < b->sendbuf ) { - return -1; - } - return +1; -} -#endif - -int NBC_Iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, NBC_Handle* handle) { - int rank, p, res, size, segsize; - MPI_Aint ext; - NBC_Schedule *schedule; -#ifdef NBC_CACHE_SCHEDULE - NBC_Allreduce_args *args, *found, search; -#endif - enum { NBC_ARED_BINOMIAL, NBC_ARED_CHAIN } alg; - char inplace; - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - handle->tmpbuf = malloc(ext*count); - if(handle->tmpbuf == NULL) { printf("Error in malloc() (%i)\n", res); return NBC_OOR; } - - if((p == 1) && !inplace) { - /* for a single node - copy data to receivebuf */ - res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - - /* algorithm selection */ - if(p < 4 || size*count < 65536) { - alg = NBC_ARED_BINOMIAL; - } else if(size*count < 262144){ - alg = NBC_ARED_CHAIN; - segsize = 16384/2; - } else { - alg = NBC_ARED_CHAIN; - segsize = 65536/2; - } - -#ifdef NBC_CACHE_SCHEDULE - /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.recvbuf=recvbuf; - search.count=count; - search.datatype=datatype; - search.op=op; - found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_ALLREDUCE], &search); - if(found == NULL) { -#endif - schedule = malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - switch(alg) { - case NBC_ARED_BINOMIAL: - res = allred_sched_diss(rank, p, count, datatype, sendbuf, recvbuf, op, schedule, handle); - break; - case NBC_ARED_CHAIN: - res = allred_sched_chain(rank, p, count, datatype, sendbuf, recvbuf, op, size, ext, schedule, handle, segsize); - break; - } - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } - - res = NBC_Sched_commit(schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } - -#ifdef NBC_CACHE_SCHEDULE - /* save schedule to tree */ - args = malloc(sizeof(NBC_Allreduce_args)); - args->sendbuf=sendbuf; - args->recvbuf=recvbuf; - args->count=count; - args->datatype=datatype; - args->op=op; - args->schedule=schedule; - res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_ALLREDUCE], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_ALLREDUCE] > SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_ALLREDUCE], &handle->comminfo->NBC_Dict_size[NBC_ALLREDUCE]); - } - } else { - /* found schedule */ - schedule=found->schedule; - } -#endif - - res = NBC_Start(handle, schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } - - /* tmpbuf is freed with the handle */ - return NBC_OK; -} - - -/* binomial allreduce (binomial tree up and binomial bcast down) - * working principle: - * - each node gets a virtual rank vrank - * - the 'root' node get vrank 0 - * - node 0 gets the vrank of the 'root' - * - all other ranks stay identical (they do not matter) - * - * Algorithm: - * pairwise exchange - * round r: - * grp = rank % 2^r - * if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value - * if grp == 1: send to rank - 2^(r-1) and exit function - * - * do this for R=log_2(p) rounds - * followed by a Bcast: - * Algorithm: - * - each node with vrank > 2^r and vrank < 2^r+1 receives from node - * vrank - 2^r (vrank=1 receives from 0, vrank 0 receives never) - * - each node sends each round r to node vrank + 2^r - * - a node stops to send if 2^r > commsize - * - */ -#define RANK2VRANK(rank, vrank, root) \ -{ \ - vrank = rank; \ - if (rank == 0) vrank = root; \ - if (rank == root) vrank = 0; \ -} -#define VRANK2RANK(rank, vrank, root) \ -{ \ - rank = vrank; \ - if (vrank == 0) rank = root; \ - if (vrank == root) rank = 0; \ -} -static __inline__ int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle) { - int root, vrank, r, maxr, firstred, vpeer, peer, res; - - root = 0; /* this makes the code for ireduce and iallreduce nearly identical - could be changed to improve performance */ - RANK2VRANK(rank, vrank, root); - maxr = (int)ceil((log(p)/LOG2)); - - firstred = 1; - for(r=1; r<=maxr; r++) { - if((vrank % (1<tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - if(firstred) { - /* perform the reduce with the senbuf */ - res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule); - firstred = 0; - } else { - /* perform the reduce in my local buffer */ - res = NBC_Sched_op(recvbuf, false, recvbuf, false, 0, true, count, datatype, op, schedule); - } - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - } - } else { - /* we have to send this round */ - vpeer = vrank - (1<<(r-1)); - VRANK2RANK(peer, vpeer, root) - if(firstred) { - /* we have to use the sendbuf in the first round .. */ - res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule); - } else { - /* and the recvbuf in all remeining rounds */ - res = NBC_Sched_send(recvbuf, false, count, datatype, peer, schedule); - } - if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - /* leave the game */ - break; - } - } - - /* this is the Bcast part - copied with minor changes from nbc_ibcast.c - * changed: buffer -> recvbuf */ - RANK2VRANK(rank, vrank, root); - - /* receive from the right hosts */ - if(vrank != 0) { - for(r=0; r= (1<tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - } - res = NBC_Sched_barrier(schedule); - if(NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - } - - /* now send to the right hosts */ - for(r=0; rtmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - } - /* end of the bcast */ - - return NBC_OK; -} - -static __inline__ int allred_sched_chain(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize) { - int res, rrpeer, rbpeer, srpeer, sbpeer, numfrag, fragnum, fragcount, thiscount, bstart, bend; - long roffset, boffset; - - /* reduce peers */ - rrpeer = rank+1; - srpeer = rank-1; - /* bcast peers */ - rbpeer = rank-1; - sbpeer = rank+1; - - if(count == 0) return NBC_OK; - - numfrag = count*size/fragsize; - if((count*size)%fragsize != 0) numfrag++; - fragcount = count/numfrag; - - /* determine the starting round of bcast ... the first reduced packet - * is after p-1 rounds at rank 0 and will be sent back ... */ - bstart = p-1+rank; - /* determine the ending round of bcast ... after arrival of the first - * packet, each rank has to forward numfrag packets */ - bend = bstart+numfrag; - //printf("[%i] numfrag: %i, count: %i, size: %i, fragcount: %i, bstart: %i, bend: %i\n", rank, numfrag, count, size, fragcount, bstart, bend); - - /* this are two loops in one - this is a little nasty :-( */ - for(fragnum = 0; fragnum < bend; fragnum++) { - roffset = fragnum*fragcount*ext; - boffset = (fragnum-bstart)*fragcount*ext; - thiscount = fragcount; - - /* first numfrag rounds ... REDUCE to rank 0 */ - if(fragnum < numfrag) { - if(fragnum == numfrag-1) { - /* last fragment may not be full */ - thiscount = count-fragcount*fragnum; - } - //printf("[%i] reduce %i elements from %lu\n", rank, thiscount, roffset); - - /* REDUCE - PART last node does not recv */ - if(rank != p-1) { - res = NBC_Sched_recv((char*)roffset, true, thiscount, datatype, rrpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - res = NBC_Sched_barrier(schedule); - /* root reduces into receivebuf */ - if(rank == 0) { - res = NBC_Sched_op((char*)recvbuf+roffset, false, (char*)sendbuf+roffset, false, (char*)roffset, true, thiscount, datatype, op, schedule); - } else { - res = NBC_Sched_op((char*)roffset, true, (char*)sendbuf+roffset, false, (char*)roffset, true, thiscount, datatype, op, schedule); - } - res = NBC_Sched_barrier(schedule); - } - - /* REDUCE PART root does not send */ - if(rank != 0) { - /* rank p-1 has to send out of sendbuffer :) */ - if(rank == p-1) { - res = NBC_Sched_send((char*)sendbuf+roffset, false, thiscount, datatype, srpeer, schedule); - } else { - res = NBC_Sched_send((char*)roffset, true, thiscount, datatype, srpeer, schedule); - } - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - /* this barrier here seems awkward but isn't!!!! */ - //res = NBC_Sched_barrier(schedule); - } - } - - /* BCAST from rank 0 */ - if(fragnum >= bstart) { - //printf("[%i] bcast %i elements from %lu\n", rank, thiscount, boffset); - if(fragnum == bend-1) { - /* last fragment may not be full */ - thiscount = count-fragcount*(fragnum-bstart); - } - - /* BCAST PART root does not receive */ - if(rank != 0) { - res = NBC_Sched_recv((char*)recvbuf+boffset, false, thiscount, datatype, rbpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - res = NBC_Sched_barrier(schedule); - } - - /* BCAST PART last rank does not send */ - if(rank != p-1) { - res = NBC_Sched_send((char*)recvbuf+boffset, false, thiscount, datatype, sbpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - res = NBC_Sched_barrier(schedule); - } - } - } - - //NBC_PRINT_SCHED(*schedule); - - return NBC_OK; -} diff --git a/ompi/mca/coll/libnbc/nbc_ialltoall.c b/ompi/mca/coll/libnbc/nbc_ialltoall.c deleted file mode 100644 index 8ab9c40bd9..0000000000 --- a/ompi/mca/coll/libnbc/nbc_ialltoall.c +++ /dev/null @@ -1,300 +0,0 @@ -#include "nbc.h" - -static __inline__ int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); -static __inline__ int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule *schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); -static __inline__ int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle); - -#ifdef NBC_CACHE_SCHEDULE -/* tree comparison function for schedule cache */ -int NBC_Alltoall_args_compare(NBC_Alltoall_args *a, NBC_Alltoall_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && - (a->sendcount == b->sendcount) && - (a->sendtype == b->sendtype) && - (a->recvbuf == b->recvbuf) && - (a->recvcount == b->recvcount) && - (a->recvtype == b->recvtype) ) { - return 0; - } - if( a->sendbuf < b->sendbuf ) { - return -1; - } - return +1; -} -#endif - -/* simple linear MPI_Ialltoall the (simple) algorithm just sends to all nodes */ -int NBC_Ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle) { - int rank, p, res, a2asize, sndsize, datasize; - NBC_Schedule *schedule; - MPI_Aint rcvext, sndext; -#ifdef NBC_CACHE_SCHEDULE - NBC_Alltoall_args *args, *found, search; -#endif - char *rbuf, *sbuf, inplace; - enum {NBC_A2A_LINEAR, NBC_A2A_PAIRWISE, NBC_A2A_DISS} alg; - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_size(sendtype, &sndsize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } - - /* algorithm selection */ - a2asize = sndsize*sendcount*p; - /* this number is optimized for TCP on odin.cs.indiana.edu */ - if((p <= 8) && ((a2asize < 1<<17) || (sndsize*sendcount < 1<<12))) { - /* just send as fast as we can if we have less than 8 peers, if the - * total communicated size is smaller than 1<<17 *and* if we don't - * have eager messages (msgsize < 1<<13) */ - alg = NBC_A2A_LINEAR; - } else if(a2asize < (1<<12)*p) { - alg = NBC_A2A_DISS; - } else - alg = NBC_A2A_LINEAR; /*NBC_A2A_PAIRWISE*/; - - if(!inplace) { - /* copy my data to receive buffer */ - rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext); - sbuf = ((char *)sendbuf) + (rank*sendcount*sndext); - res = NBC_Copy(sbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - - /* allocate temp buffer if we need one */ - if(alg == NBC_A2A_DISS) { - /* only A2A_DISS needs buffers */ - if(NBC_Type_intrinsic(sendtype)) { - datasize = sndext*sendcount; - } else { - res = MPI_Pack_size(sendcount, sendtype, comm, &datasize); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; } - } - /* allocate temporary buffers */ - if(p % 2 == 0) { - handle->tmpbuf=malloc(datasize*p*2); - } else { - /* we cannot divide p by two, so alloc more to be safe ... */ - handle->tmpbuf=malloc(datasize*(p/2+1)*2*2); - } - - /* phase 1 - rotate n data blocks upwards into the tmpbuffer */ - if(NBC_Type_intrinsic(sendtype)) { - /* contiguous - just copy (1st copy) */ - memcpy(handle->tmpbuf, (char*)sendbuf+datasize*rank, datasize*(p-rank)); - if(rank != 0) memcpy((char*)handle->tmpbuf+datasize*(p-rank), sendbuf, datasize*(rank)); - } else { - int pos=0; - - /* non-contiguous - pack */ - res = MPI_Pack((char*)sendbuf+rank*sendcount*sndext, (p-rank)*sendcount, sendtype, handle->tmpbuf, (p-rank)*datasize, &pos, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; } - if(rank != 0) { - pos = 0; - MPI_Pack(sendbuf, rank*sendcount, sendtype, (char*)handle->tmpbuf+datasize*(p-rank), rank*datasize, &pos, comm); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; } - } - } - } else { - handle->tmpbuf=NULL; - } - -#ifdef NBC_CACHE_SCHEDULE - /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.sendcount=sendcount; - search.sendtype=sendtype; - search.recvbuf=recvbuf; - search.recvcount=recvcount; - search.recvtype=recvtype; - found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_ALLTOALL], &search); - if(found == NULL) { -#endif - /* not found - generate new schedule */ - schedule = malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - switch(alg) { - case NBC_A2A_LINEAR: - res = a2a_sched_linear(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); - break; - case NBC_A2A_DISS: - res = a2a_sched_diss(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, handle); - break; - case NBC_A2A_PAIRWISE: - res = a2a_sched_pairwise(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); - break; - } - - if (NBC_OK != res) { return res; } - - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } - -#ifdef NBC_CACHE_SCHEDULE - /* save schedule to tree */ - args = malloc(sizeof(NBC_Alltoall_args)); - args->sendbuf=sendbuf; - args->sendcount=sendcount; - args->sendtype=sendtype; - args->recvbuf=recvbuf; - args->recvcount=recvcount; - args->recvtype=recvtype; - args->schedule=schedule; - res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_ALLTOALL], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_ALLTOALL] > SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_ALLTOALL], &handle->comminfo->NBC_Dict_size[NBC_ALLTOALL]); - /*if(!rank) printf("[%i] removing %i elements - new size: %i \n", rank, SCHED_DICT_UPPER-SCHED_DICT_LOWER, handle->comminfo->NBC_Alltoall_size);*/ - } - /*if(!rank) printf("[%i] added new schedule to tree - number %i\n", rank, handle->comminfo->NBC_Dict_size[NBC_ALLTOALL]);*/ - } else { - /* found schedule */ - schedule=found->schedule; - } -#endif - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } - - return NBC_OK; -} - -static __inline__ int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) { - int res, r, sndpeer, rcvpeer; - char *rbuf, *sbuf; - - res = NBC_OK; - if(p < 2) return res; - - for(r=1;rtmpbuf+datasize*p; - stmpbuf = (char*)handle->tmpbuf+datasize*(p+p/2); - } else { - /* we cannot divide p by two, so alloc more to be safe ... */ - virtp = (p/2+1)*2; - rtmpbuf = (char*)handle->tmpbuf+datasize*p; - stmpbuf = (char*)handle->tmpbuf+datasize*(p+virtp/2); - } - - /* phase 2 - communicate */ - /*printf("[%i] temp buffer is at %lu of size %i, maxround: %i\n", rank, (unsigned long)handle->tmpbuf, (int)datasize*p*(1<tmpbuf, true, datasize, MPI_BYTE, schedule); - offset += datasize; - } - } - - speer = ( rank + r) % p; - /* add p because modulo does not work with negative values */ - rpeer = ((rank - r)+p) % p; - - /*printf("[%i] receiving %i bytes from host %i into rbuf %lu\n", rank, offset, rpeer, (unsigned long)rtmpbuf);*/ - res = NBC_Sched_recv(rtmpbuf-(unsigned long)handle->tmpbuf, true, offset, MPI_BYTE, rpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - - /*printf("[%i] sending %i bytes to host %i from sbuf %lu\n", rank, offset, speer, (unsigned long)stmpbuf);*/ - res = NBC_Sched_send(stmpbuf-(unsigned long)handle->tmpbuf, true, offset, MPI_BYTE, speer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - /* unpack from buffer */ - offset = 0; - for(i=1; itmpbuf, true, datasize, MPI_BYTE, (void*)(long)(i*datasize), true, datasize, MPI_BYTE, schedule); - offset += datasize; - } - } - } - - /* phase 3 - reorder - data is now in wrong order in handle->tmpbuf - - * reorder it into recvbuf */ - for(i=0; itmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - /* copy data to receivbuffer */ - if((sendcounts[rank] != 0) && !inplace) { - rbuf = ((char *) recvbuf) + (rdispls[rank] * rcvext); - sbuf = ((char *) sendbuf) + (sdispls[rank] * sndext); - res = NBC_Copy(sbuf, sendcounts[rank], sendtype, rbuf, recvcounts[rank], recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - - for (i = 0; i < p; i++) { - if (i == rank) { continue; } - /* post all sends */ - sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); - res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - /* post all receives */ - rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); - res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - - /*NBC_PRINT_SCHED(*schedule);*/ - - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } - - return NBC_OK; -} diff --git a/ompi/mca/coll/libnbc/nbc_ibarrier.c b/ompi/mca/coll/libnbc/nbc_ibarrier.c deleted file mode 100644 index 5d3dd7b5ee..0000000000 --- a/ompi/mca/coll/libnbc/nbc_ibarrier.c +++ /dev/null @@ -1,75 +0,0 @@ -#include "nbc.h" - -/* Dissemination implementation of MPI_Ibarrier */ -int NBC_Ibarrier(MPI_Comm comm, NBC_Handle* handle) { - int round, rank, p, maxround, res, recvpeer, sendpeer; - NBC_Schedule *schedule; - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - - handle->tmpbuf=NULL; - -#ifdef NBC_CACHE_SCHEDULE - /* there only one argument set per communicator -> hang it directly at - * the tree-position, NBC_Dict_size[...] is 0 for not initialized and - * 1 for initialized. NBC_Dict[...] is a pointer to the schedule in - * this case */ - if(handle->comminfo->NBC_Dict_size[NBC_BARRIER] == 0) { - /* we did not init it yet */ -#endif - schedule = malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - round = -1; - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - maxround = (int)ceil((log(p)/LOG2)-1); - - do { - round++; - sendpeer = (rank + (1<comminfo->NBC_Dict[NBC_BARRIER] = (hb_tree*)schedule; - handle->comminfo->NBC_Dict_size[NBC_BARRIER] = 1; - } else { - /* we found it */ - schedule = (NBC_Schedule*)handle->comminfo->NBC_Dict[NBC_BARRIER]; - } -#endif - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } - - return NBC_OK; -} - -/*void NBC_IBARRIER(MPI_Fint *comm, MPI_Fint *ierr) { - *ierr = NBC_Ibarrier(MPI_Comm comm, NBC_Handle* handle); -}*/ diff --git a/ompi/mca/coll/libnbc/nbc_ibcast.c b/ompi/mca/coll/libnbc/nbc_ibcast.c deleted file mode 100644 index 7398e4d0bf..0000000000 --- a/ompi/mca/coll/libnbc/nbc_ibcast.c +++ /dev/null @@ -1,234 +0,0 @@ -#include "nbc.h" - -static __inline__ int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype); -static __inline__ int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype); -static __inline__ int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype, int fragsize, int size); - -#ifdef NBC_CACHE_SCHEDULE -/* tree comparison function for schedule cache */ -int NBC_Bcast_args_compare(NBC_Bcast_args *a, NBC_Bcast_args *b, void *param) { - - if( (a->buffer == b->buffer) && - (a->count == b->count) && - (a->datatype == b->datatype) && - (a->root == b->root) ) { - return 0; - } - if( a->buffer < b->buffer ) { - return -1; - } - return +1; -} -#endif - -int NBC_Ibcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, NBC_Handle* handle) { - int rank, p, res, size, segsize; - NBC_Schedule *schedule; -#ifdef NBC_CACHE_SCHEDULE - NBC_Bcast_args *args, *found, search; -#endif - enum { NBC_BCAST_LINEAR, NBC_BCAST_BINOMIAL, NBC_BCAST_CHAIN } alg; - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } - - /* algorithm selection */ - if(p <= 4) { - alg = NBC_BCAST_LINEAR; - } else if(size*count < 65536) { - alg = NBC_BCAST_BINOMIAL; - } else if(size*count < 524288) { - alg = NBC_BCAST_CHAIN; - segsize = 16384/2; - } else { - alg = NBC_BCAST_CHAIN; - segsize = 65536/2; - } - - handle->tmpbuf=NULL; - -#ifdef NBC_CACHE_SCHEDULE - /* search schedule in communicator specific tree */ - search.buffer=buffer; - search.count=count; - search.datatype=datatype; - search.root=root; - found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_BCAST], &search); - if(found == NULL) { -#endif - schedule = malloc(sizeof(NBC_Schedule)); - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create, res = %i\n", res); return res; } - - switch(alg) { - case NBC_BCAST_LINEAR: - res = bcast_sched_linear(rank, p, root, schedule, buffer, count, datatype); - break; - case NBC_BCAST_BINOMIAL: - res = bcast_sched_binomial(rank, p, root, schedule, buffer, count, datatype); - break; - case NBC_BCAST_CHAIN: - res = bcast_sched_chain(rank, p, root, schedule, buffer, count, datatype, segsize, size); - break; - } - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } - - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } -#ifdef NBC_CACHE_SCHEDULE - /* save schedule to tree */ - args = malloc(sizeof(NBC_Bcast_args)); - args->buffer=buffer; - args->count=count; - args->datatype=datatype; - args->root=root; - args->schedule=schedule; - res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_BCAST], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_BCAST] > SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_BCAST], &handle->comminfo->NBC_Dict_size[NBC_BCAST]); - } - } else { - /* found schedule */ - schedule=found->schedule; - } -#endif - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } - - return NBC_OK; -} - -/* better binomial bcast - * working principle: - * - each node gets a virtual rank vrank - * - the 'root' node get vrank 0 - * - node 0 gets the vrank of the 'root' - * - all other ranks stay identical (they do not matter) - * - * Algorithm: - * - each node with vrank > 2^r and vrank < 2^r+1 receives from node - * vrank - 2^r (vrank=1 receives from 0, vrank 0 receives never) - * - each node sends each round r to node vrank + 2^r - * - a node stops to send if 2^r > commsize - */ -#define RANK2VRANK(rank, vrank, root) \ -{ \ - vrank = rank; \ - if (rank == 0) vrank = root; \ - if (rank == root) vrank = 0; \ -} -#define VRANK2RANK(rank, vrank, root) \ -{ \ - rank = vrank; \ - if (vrank == 0) rank = root; \ - if (vrank == root) rank = 0; \ -} -static __inline__ int bcast_sched_binomial(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count, MPI_Datatype datatype) { - int maxr, vrank, peer, r, res; - - maxr = (int)ceil((log(p)/LOG2)); - - RANK2VRANK(rank, vrank, root); - - /* receive from the right hosts */ - if(vrank != 0) { - for(r=0; r= (1<sendbuf == b->sendbuf) && - (a->sendcount == b->sendcount) && - (a->sendtype == b->sendtype) && - (a->recvbuf == b->recvbuf) && - (a->recvcount == b->recvcount) && - (a->recvtype == b->recvtype) && - (a->root == b->root) ) { - return 0; - } - if( a->sendbuf < b->sendbuf ) { - return -1; - } - return +1; -} -#endif - -/* simple linear MPI_Igather */ -int NBC_Igather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle) { - int rank, p, res, i; - MPI_Aint rcvext; - NBC_Schedule *schedule; - char *rbuf, inplace; -#ifdef NBC_CACHE_SCHEDULE - NBC_Gather_args *args, *found, search; -#endif - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - handle->tmpbuf = NULL; - - if((rank == root) && (!inplace)) { - rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext); - /* if I am the root - just copy the message (only without MPI_IN_PLACE) */ - res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - -#ifdef NBC_CACHE_SCHEDULE - /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.sendcount=sendcount; - search.sendtype=sendtype; - search.recvbuf=recvbuf; - search.recvcount=recvcount; - search.recvtype=recvtype; - search.root=root; - found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_GATHER], &search); - if(found == NULL) { -#endif - schedule = malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - /* send to root */ - if(rank != root) { - /* send msg to root */ - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } else { - for(i=0;isendbuf=sendbuf; - args->sendcount=sendcount; - args->sendtype=sendtype; - args->recvbuf=recvbuf; - args->recvcount=recvcount; - args->recvtype=recvtype; - args->root=root; - args->schedule=schedule; - res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_GATHER], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_GATHER] > SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_GATHER], &handle->comminfo->NBC_Dict_size[NBC_GATHER]); - } - } else { - /* found schedule */ - schedule=found->schedule; - } -#endif - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } - - return NBC_OK; -} diff --git a/ompi/mca/coll/libnbc/nbc_igatherv.c b/ompi/mca/coll/libnbc/nbc_igatherv.c deleted file mode 100644 index d42a5419ad..0000000000 --- a/ompi/mca/coll/libnbc/nbc_igatherv.c +++ /dev/null @@ -1,62 +0,0 @@ -#include "nbc.h" - -/* an gatherv schedule can not be cached easily because the contents - * ot the recvcounts array may change, so a comparison of the address - * would not be sufficient ... we simply do not cache it */ - -/* simple linear MPI_Igatherv */ -int NBC_Igatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle) { - int rank, p, res, i; - MPI_Aint rcvext; - NBC_Schedule *schedule; - char *rbuf, inplace; - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Type_extent(recvtype, &rcvext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - schedule = malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - /* send to root */ - if(rank != root) { - /* send msg to root */ - res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } else { - for(i=0;isendbuf == b->sendbuf) && - (a->recvbuf == b->recvbuf) && - (a->count == b->count) && - (a->datatype == b->datatype) && - (a->op == b->op) && - (a->root == b->root) ) { - return 0; - } - if( a->sendbuf < b->sendbuf ) { - return -1; - } - return +1; -} -#endif - -/* the non-blocking reduce */ -int NBC_Ireduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, NBC_Handle* handle) { - int rank, p, res, segsize, size; - MPI_Aint ext; - NBC_Schedule *schedule; - char *redbuf=NULL, inplace; -#ifdef NBC_CACHE_SCHEDULE - NBC_Reduce_args *args, *found, search; -#endif - enum { NBC_RED_BINOMIAL, NBC_RED_CHAIN } alg; - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - /* only one node -> copy data */ - if((p == 1) && !inplace) { - res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - - /* algorithm selection */ - if(p < 4 || size*count < 65536) { - alg = NBC_RED_BINOMIAL; - if(rank == root) { - /* root reduces in receivebuffer */ - handle->tmpbuf = malloc(ext*count); - } else { - /* recvbuf may not be valid on non-root nodes */ - handle->tmpbuf = malloc(ext*count*2); - redbuf = ((char*)handle->tmpbuf)+(ext*count); - } - } else { - handle->tmpbuf = malloc(ext*count); - alg = NBC_RED_CHAIN; - segsize = 16384/2; - } - if (NULL == handle->tmpbuf) { printf("Error in malloc() (%i)\n", res); return res; } - -#ifdef NBC_CACHE_SCHEDULE - /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.recvbuf=recvbuf; - search.count=count; - search.datatype=datatype; - search.op=op; - search.root=root; - found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_REDUCE], &search); - if(found == NULL) { -#endif - schedule = malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - switch(alg) { - case NBC_RED_BINOMIAL: - res = red_sched_binomial(rank, p, root, sendbuf, recvbuf, count, datatype, op, redbuf, schedule, handle); - break; - case NBC_RED_CHAIN: - res = red_sched_chain(rank, p, root, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, handle, segsize); - break; - } - if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; } - - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } -#ifdef NBC_CACHE_SCHEDULE - /* save schedule to tree */ - args = malloc(sizeof(NBC_Alltoall_args)); - args->sendbuf=sendbuf; - args->recvbuf=recvbuf; - args->count=count; - args->datatype=datatype; - args->op=op; - args->root=root; - args->schedule=schedule; - res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_REDUCE], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for Reduce */ - if(++handle->comminfo->NBC_Dict_size[NBC_REDUCE] > SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_REDUCE], &handle->comminfo->NBC_Dict_size[NBC_REDUCE]); - } - } else { - /* found schedule */ - schedule=found->schedule; - } -#endif - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } - - /* tmpbuf is freed with the handle */ - return NBC_OK; -} - - -/* binomial reduce - * working principle: - * - each node gets a virtual rank vrank - * - the 'root' node get vrank 0 - * - node 0 gets the vrank of the 'root' - * - all other ranks stay identical (they do not matter) - * - * Algorithm: - * pairwise exchange - * round r: - * grp = rank % 2^r - * if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value - * if grp == 1: send to rank - 2^(r-1) and exit function - * - * do this for R=log_2(p) rounds - * - */ -#define RANK2VRANK(rank, vrank, root) \ -{ \ - vrank = rank; \ - if (rank == 0) vrank = root; \ - if (rank == root) vrank = 0; \ -} -#define VRANK2RANK(rank, vrank, root) \ -{ \ - rank = vrank; \ - if (vrank == 0) rank = root; \ - if (vrank == root) rank = 0; \ -} -static __inline__ int red_sched_binomial(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, char *redbuf, NBC_Schedule *schedule, NBC_Handle *handle) { - int firstred, vrank, vpeer, peer, res, maxr, r; - - RANK2VRANK(rank, vrank, root); - maxr = (int)ceil((log(p)/LOG2)); - - firstred = 1; - for(r=1; r<=maxr; r++) { - if((vrank % (1<tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - /* perform the reduce in my local buffer */ - if(firstred) { - if(rank == root) { - /* root is the only one who reduces in the receivebuffer - * take data from sendbuf in first round - save copy */ - res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule); - } else { - /* all others may not have a receive buffer - * take data from sendbuf in first round - save copy */ - res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, sendbuf, false, 0, true, count, datatype, op, schedule); - } - firstred = 0; - } else { - if(rank == root) { - /* root is the only one who reduces in the receivebuffer */ - res = NBC_Sched_op(recvbuf, false, recvbuf, false, 0, true, count, datatype, op, schedule); - } else { - /* all others may not have a receive buffer */ - res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, redbuf-(unsigned long)handle->tmpbuf, true, 0, true, count, datatype, op, schedule); - } - } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - } - } else { - /* we have to send this round */ - vpeer = vrank - (1<<(r-1)); - VRANK2RANK(peer, vpeer, root) - if(firstred) { - /* we did not reduce anything */ - res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule); - } else { - /* we have to use the redbuf the root (which works in receivebuf) is never sending .. */ - res = NBC_Sched_send(redbuf-(unsigned long)handle->tmpbuf, true, count, datatype, peer, schedule); - } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - /* leave the game */ - break; - } - } - - return NBC_OK; -} - -/* chain send ... */ -static __inline__ int red_sched_chain(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize) { - int res, vrank, rpeer, speer, numfrag, fragnum, fragcount, thiscount; - long offset; - - RANK2VRANK(rank, vrank, root); - VRANK2RANK(rpeer, vrank+1, root); - VRANK2RANK(speer, vrank-1, root); - - if(count == 0) return NBC_OK; - - numfrag = count*size/fragsize; - if((count*size)%fragsize != 0) numfrag++; - fragcount = count/numfrag; - /*printf("numfrag: %i, count: %i, size: %i, fragcount: %i\n", numfrag, count, size, fragcount);*/ - - for(fragnum = 0; fragnum < numfrag; fragnum++) { - offset = fragnum*fragcount*ext; - thiscount = fragcount; - if(fragnum == numfrag-1) { - /* last fragment may not be full */ - thiscount = count-fragcount*fragnum; - } - - /* last node does not recv */ - if(vrank != p-1) { - res = NBC_Sched_recv((char*)offset, true, thiscount, datatype, rpeer, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - res = NBC_Sched_barrier(schedule); - /* root reduces into receivebuf */ - if(vrank == 0) { - res = NBC_Sched_op((char*)recvbuf+offset, false, (char*)sendbuf+offset, false, (char*)offset, true, thiscount, datatype, op, schedule); - } else { - res = NBC_Sched_op((char*)offset, true, (char*)sendbuf+offset, false, (char*)offset, true, thiscount, datatype, op, schedule); - } - res = NBC_Sched_barrier(schedule); - } - - /* root does not send */ - if(vrank != 0) { - /* rank p-1 has to send out of sendbuffer :) */ - if(vrank == p-1) { - res = NBC_Sched_send((char*)sendbuf+offset, false, thiscount, datatype, speer, schedule); - } else { - res = NBC_Sched_send((char*)offset, true, thiscount, datatype, speer, schedule); - } - if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - /* this barrier here seems awkward but isn't!!!! */ - res = NBC_Sched_barrier(schedule); - } - } - - return NBC_OK; -} diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c deleted file mode 100644 index e55889f314..0000000000 --- a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c +++ /dev/null @@ -1,132 +0,0 @@ -#include "nbc.h" - -/* an reduce_csttare schedule can not be cached easily because the contents - * ot the recvcounts array may change, so a comparison of the address - * would not be sufficient ... we simply do not cache it */ - -/* binomial reduce to rank 0 followed by a linear scatter ... - * - * Algorithm: - * pairwise exchange - * round r: - * grp = rank % 2^r - * if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value - * if grp == 1: send to rank - 2^(r-1) and exit function - * - * do this for R=log_2(p) rounds - * - */ -int NBC_Ireduce_scatter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, NBC_Handle* handle) { - int peer, rank, maxr, p, r, res, count, offset, firstred; - MPI_Aint ext; - char *redbuf, *sbuf, inplace; - NBC_Schedule *schedule; - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - schedule = malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - maxr = (int)ceil((log(p)/LOG2)); - - count = 0; - for(r=0;rtmpbuf = malloc(ext*count*2); - if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } - - redbuf = ((char*)handle->tmpbuf)+(ext*count); - - /* copy data to redbuf if we only have a single node */ - if((p==1) && !inplace) { - res = NBC_Copy(sendbuf, count, datatype, redbuf, count, datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - - firstred = 1; - for(r=1; r<=maxr; r++) { - if((rank % (1<tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - if(firstred) { - /* take reduce data from the sendbuf in the first round -> save copy */ - res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, sendbuf, false, 0, true, count, datatype, op, schedule); - firstred = 0; - } else { - /* perform the reduce in my local buffer */ - res = NBC_Sched_op(redbuf-(unsigned long)handle->tmpbuf, true, redbuf-(unsigned long)handle->tmpbuf, true, 0, true, count, datatype, op, schedule); - } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - } - } else { - /* we have to send this round */ - peer = rank - (1<<(r-1)); - if(firstred) { - /* we have to send the senbuf */ - res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule); - } else { - /* we send an already reduced value from redbuf */ - res = NBC_Sched_send(redbuf-(unsigned long)handle->tmpbuf, true, count, datatype, peer, schedule); - } - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - /* leave the game */ - break; - } - } - - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - - /* rank 0 is root and sends - all others receive */ - if(rank != 0) { - res = NBC_Sched_recv(recvbuf, false, recvcounts[rank], datatype, 0, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } - - if(rank == 0) { - offset = 0; - for(r=1;rtmpbuf, true, recvcounts[r], datatype, r, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - res = NBC_Sched_copy(redbuf-(unsigned long)handle->tmpbuf, true, recvcounts[0], datatype, recvbuf, false, recvcounts[0], datatype, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_copy() (%i)\n", res); return res; } - } - - /*NBC_PRINT_SCHED(*schedule);*/ - - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } - - /* tmpbuf is freed with the handle */ - return NBC_OK; -} - - diff --git a/ompi/mca/coll/libnbc/nbc_iscan.c b/ompi/mca/coll/libnbc/nbc_iscan.c deleted file mode 100644 index 9e2812f45b..0000000000 --- a/ompi/mca/coll/libnbc/nbc_iscan.c +++ /dev/null @@ -1,120 +0,0 @@ -#include "nbc.h" - -#ifdef NBC_CACHE_SCHEDULE -/* tree comparison function for schedule cache */ -int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && - (a->recvbuf == b->recvbuf) && - (a->count == b->count) && - (a->datatype == b->datatype) && - (a->op == b->op) ) { - return 0; - } - if( a->sendbuf < b->sendbuf ) { - return -1; - } - return +1; -} -#endif - -/* linear iscan - * working principle: - * 1. each node (but node 0) receives from left neigbor - * 2. performs op - * 3. all but rank p-1 do sends to it's right neigbor and exits - * - */ -int NBC_Iscan(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, NBC_Handle* handle) { - int rank, p, res; - MPI_Aint ext; - NBC_Schedule *schedule; -#ifdef NBC_CACHE_SCHEDULE - NBC_Scan_args *args, *found, search; -#endif - char inplace; - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - handle->tmpbuf = malloc(ext*count); - if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; } - - if((rank == 0) && !inplace) { - /* copy data to receivebuf */ - res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - -#ifdef NBC_CACHE_SCHEDULE - /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.recvbuf=recvbuf; - search.count=count; - search.datatype=datatype; - search.op=op; - found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_SCAN], &search); - if(found == NULL) { -#endif - schedule = malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - if(rank != 0) { - res = NBC_Sched_recv(0, true, count, datatype, rank-1, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - /* we have to wait until we have the data */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - /* perform the reduce in my local buffer */ - res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( */ - res = NBC_Sched_barrier(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; } - } - if(rank != p-1) { - res = NBC_Sched_send(recvbuf, false, count, datatype, rank+1, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; } - } - - res = NBC_Sched_commit(schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; } - -#ifdef NBC_CACHE_SCHEDULE - /* save schedule to tree */ - args = malloc(sizeof(NBC_Alltoall_args)); - args->sendbuf=sendbuf; - args->recvbuf=recvbuf; - args->count=count; - args->datatype=datatype; - args->op=op; - args->schedule=schedule; - res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_SCAN], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_SCAN] > SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_SCAN], &handle->comminfo->NBC_Dict_size[NBC_SCAN]); - } - } else { - /* found schedule */ - schedule=found->schedule; - } -#endif - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; } - - /* tmpbuf is freed with the handle */ - return NBC_OK; -} diff --git a/ompi/mca/coll/libnbc/nbc_iscatter.c b/ompi/mca/coll/libnbc/nbc_iscatter.c deleted file mode 100644 index 3954917d5c..0000000000 --- a/ompi/mca/coll/libnbc/nbc_iscatter.c +++ /dev/null @@ -1,117 +0,0 @@ -#include "nbc.h" - -#ifdef NBC_CACHE_SCHEDULE -/* tree comparison function for schedule cache */ -int NBC_Scatter_args_compare(NBC_Scatter_args *a, NBC_Scatter_args *b, void *param) { - - if( (a->sendbuf == b->sendbuf) && - (a->sendcount == b->sendcount) && - (a->sendtype == b->sendtype) && - (a->recvbuf == b->recvbuf) && - (a->recvcount == b->recvcount) && - (a->recvtype == b->recvtype) && - (a->root == b->root) ) { - return 0; - } - if( a->sendbuf < b->sendbuf ) { - return -1; - } - return +1; -} -#endif - -/* simple linear MPI_Iscatter */ -int NBC_Iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle) { - int rank, p, res, i; - MPI_Aint sndext; - NBC_Schedule *schedule; - char *sbuf, inplace; -#ifdef NBC_CACHE_SCHEDULE - NBC_Scatter_args *args, *found, search; -#endif - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - handle->tmpbuf=NULL; - - if((rank == root) && (!inplace)) { - sbuf = ((char *)sendbuf) + (rank*sendcount*sndext); - /* if I am the root - just copy the message (not for MPI_IN_PLACE) */ - res = NBC_Copy(sbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); - if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; } - } - -#ifdef NBC_CACHE_SCHEDULE - /* search schedule in communicator specific tree */ - search.sendbuf=sendbuf; - search.sendcount=sendcount; - search.sendtype=sendtype; - search.recvbuf=recvbuf; - search.recvcount=recvcount; - search.recvtype=recvtype; - search.root=root; - found = hb_tree_search(handle->comminfo->NBC_Dict[NBC_SCATTER], &search); - if(found == NULL) { -#endif - schedule = malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - /* receive from root */ - if(rank != root) { - /* recv msg from root */ - res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } else { - for(i=0;isendbuf=sendbuf; - args->sendcount=sendcount; - args->sendtype=sendtype; - args->recvbuf=recvbuf; - args->recvcount=recvcount; - args->recvtype=recvtype; - args->root=root; - args->schedule=schedule; - res = hb_tree_insert (handle->comminfo->NBC_Dict[NBC_SCATTER], args, args, 0); - if(res != 0) printf("error in dict_insert() (%i)\n", res); - /* increase number of elements for A2A */ - if(++handle->comminfo->NBC_Dict_size[NBC_SCATTER] > SCHED_DICT_UPPER) { - NBC_SchedCache_dictwipe(handle->comminfo->NBC_Dict[NBC_SCATTER], &handle->comminfo->NBC_Dict_size[NBC_SCATTER]); - } - } else { - /* found schedule */ - schedule=found->schedule; - } -#endif - - - res = NBC_Start(handle, schedule); - if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; } - - return NBC_OK; -} diff --git a/ompi/mca/coll/libnbc/nbc_iscatterv.c b/ompi/mca/coll/libnbc/nbc_iscatterv.c deleted file mode 100644 index 9bfcf1f9fd..0000000000 --- a/ompi/mca/coll/libnbc/nbc_iscatterv.c +++ /dev/null @@ -1,62 +0,0 @@ -#include "nbc.h" - -/* an scatterv schedule can not be cached easily because the contents - * ot the recvcounts array may change, so a comparison of the address - * would not be sufficient ... we simply do not cache it */ - -/* simple linear MPI_Iscatterv */ -int NBC_Iscatterv(void* sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, NBC_Handle* handle) { - int rank, p, res, i; - MPI_Aint sndext; - NBC_Schedule *schedule; - char *sbuf, inplace; - - NBC_IN_PLACE(sendbuf, recvbuf, inplace); - - res = NBC_Init_handle(handle, comm); - if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(sendtype, &sndext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - - schedule = malloc(sizeof(NBC_Schedule)); - if (NULL == schedule) { printf("Error in malloc()\n"); return res; } - - handle->tmpbuf=NULL; - - res = NBC_Sched_create(schedule); - if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; } - - /* receive from root */ - if(rank != root) { - /* recv msg from root */ - res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule); - if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; } - } else { - for(i=0;i *(((int*)buf2) + i)) *(((int*)buf3) + i) = *(((int*)buf2) + i); else *(((int*)buf3) + i) = *(((int*)buf1) + i); - } - } else if(op == MPI_MAX) { - for(i=0; i *(((long*)buf2) + i)) *(((long*)buf3) + i) = *(((long*)buf2) + i); else *(((long*)buf3) + i) = *(((long*)buf1) + i); - } - } else if(op == MPI_MAX) { - for(i=0; i *(((short*)buf2) + i)) *(((short*)buf3) + i) = *(((short*)buf2) + i); else *(((short*)buf3) + i) = *(((short*)buf1) + i); - } - } else if(op == MPI_MAX) { - for(i=0; i *(((unsigned int*)buf2) + i)) *(((unsigned int*)buf3) + i) = *(((unsigned int*)buf2) + i); else *(((unsigned int*)buf3) + i) = *(((unsigned int*)buf1) + i); - } - } else if(op == MPI_MAX) { - for(i=0; i *(((unsigned long*)buf2) + i)) *(((unsigned long*)buf3) + i) = *(((unsigned long*)buf2) + i); else *(((unsigned long*)buf3) + i) = *(((unsigned long*)buf1) + i); - } - } else if(op == MPI_MAX) { - for(i=0; i *(((unsigned short*)buf2) + i)) *(((unsigned short*)buf3) + i) = *(((unsigned short*)buf2) + i); else *(((unsigned short*)buf3) + i) = *(((unsigned short*)buf1) + i); - } - } else if(op == MPI_MAX) { - for(i=0; i *(((float*)buf2) + i)) *(((float*)buf3) + i) = *(((float*)buf2) + i); else *(((float*)buf3) + i) = *(((float*)buf1) + i); - } - } else if(op == MPI_MAX) { - for(i=0; i *(((double*)buf2) + i)) *(((double*)buf3) + i) = *(((double*)buf2) + i); else *(((double*)buf3) + i) = *(((double*)buf1) + i); - } - } else if(op == MPI_MAX) { - for(i=0; i *(((long double*)buf2) + i)) *(((long double*)buf3) + i) = *(((long double*)buf2) + i); else *(((long double*)buf3) + i) = *(((long double*)buf1) + i); - } - } else if(op == MPI_MAX) { - for(i=0; ival < ptr2->val) { - ptr3->val = ptr2->val; ptr3->rank = ptr2->rank; - } else { - ptr3->val = ptr1->val; ptr3->rank = ptr1->rank; - } - } - } else if(op == MPI_MINLOC) { - for(i=0; ival > ptr2->val) { - ptr3->val = ptr2->val; ptr3->rank = ptr2->rank; - } else { - ptr3->val = ptr1->val; ptr3->rank = ptr1->rank; - } - } - } else return NBC_OP_NOT_SUPPORTED; - } else if(type == MPI_DOUBLE_INT) { - if(op == MPI_MAXLOC) { - for(i=0; ival < ptr2->val) { - ptr3->val = ptr2->val; ptr3->rank = ptr2->rank; - } else { - ptr3->val = ptr1->val; ptr3->rank = ptr1->rank; - } - } - } else if(op == MPI_MINLOC) { - for(i=0; ival > ptr2->val) { - ptr3->val = ptr2->val; ptr3->rank = ptr2->rank; - } else { - ptr3->val = ptr1->val; ptr3->rank = ptr1->rank; - } - } - } else return NBC_OP_NOT_SUPPORTED; - } else if(type == MPI_LONG_INT) { - if(op == MPI_MAXLOC) { - for(i=0; ival < ptr2->val) { - ptr3->val = ptr2->val; ptr3->rank = ptr2->rank; - } else { - ptr3->val = ptr1->val; ptr3->rank = ptr1->rank; - } - } - } else if(op == MPI_MINLOC) { - for(i=0; ival > ptr2->val) { - ptr3->val = ptr2->val; ptr3->rank = ptr2->rank; - } else { - ptr3->val = ptr1->val; ptr3->rank = ptr1->rank; - } - } - } else return NBC_OP_NOT_SUPPORTED; - } else if(type == MPI_2INT) { - if(op == MPI_MAXLOC) { - for(i=0; ival < ptr2->val) { - ptr3->val = ptr2->val; ptr3->rank = ptr2->rank; - } else { - ptr3->val = ptr1->val; ptr3->rank = ptr1->rank; - } - } - } else if(op == MPI_MINLOC) { - for(i=0; ival > ptr2->val) { - ptr3->val = ptr2->val; ptr3->rank = ptr2->rank; - } else { - ptr3->val = ptr1->val; ptr3->rank = ptr1->rank; - } - } - } else return NBC_OP_NOT_SUPPORTED; - } else if(type == MPI_SHORT_INT) { - if(op == MPI_MAXLOC) { - for(i=0; ival < ptr2->val) { - ptr3->val = ptr2->val; ptr3->rank = ptr2->rank; - } else { - ptr3->val = ptr1->val; ptr3->rank = ptr1->rank; - } - } - } else if(op == MPI_MINLOC) { - for(i=0; ival > ptr2->val) { - ptr3->val = ptr2->val; ptr3->rank = ptr2->rank; - } else { - ptr3->val = ptr1->val; ptr3->rank = ptr1->rank; - } - } - } else return NBC_OP_NOT_SUPPORTED; - } else if(type == MPI_LONG_DOUBLE_INT) { - if(op == MPI_MAXLOC) { - for(i=0; ival < ptr2->val) { - ptr3->val = ptr2->val; ptr3->rank = ptr2->rank; - } else { - ptr3->val = ptr1->val; ptr3->rank = ptr1->rank; - } - } - } else if(op == MPI_MINLOC) { - for(i=0; ival > ptr2->val) { - ptr3->val = ptr2->val; ptr3->rank = ptr2->rank; - } else { - ptr3->val = ptr1->val; ptr3->rank = ptr1->rank; - } - } - } else return NBC_OP_NOT_SUPPORTED; - } else return NBC_DATATYPE_NOT_SUPPORTED; - - return NBC_OK; -} - diff --git a/ompi/mca/coll/libnbc/nbc_op.c.m4 b/ompi/mca/coll/libnbc/nbc_op.c.m4 deleted file mode 100644 index b7e59dc3c9..0000000000 --- a/ompi/mca/coll/libnbc/nbc_op.c.m4 +++ /dev/null @@ -1,244 +0,0 @@ -dnl/* -dnl * Copyright (c) 2006 The Trustees of Indiana University and Indiana -dnl * University Research and Technology -dnl * Corporation. All rights reserved. -dnl * Copyright (c) 2006 The Technical University of Chemnitz. All -dnl * rights reserved. -dnl */ -dnl -dnl this m4 code generate all MPI intrinsic operations -dnl every macro is prefixed with m4_ to retain clarity (this means that -dnl everything prefixed with m4_ will be replaced by m4!) -dnl -dnl -dnl -dnl ########## define all MPI intrinsic Operations and appropriate C code ############# -define(m4_OP_MPI_MIN, `if(m4_ARG1$1 > m4_ARG2$1) m4_ARG3$1 = m4_ARG2$1; else m4_ARG3$1 = m4_ARG1$1;')dnl -define(m4_OP_MPI_MAX, `if(m4_ARG1$1 < m4_ARG2$1) m4_ARG3$1 = m4_ARG2$1; else m4_ARG3$1 = m4_ARG1$1;')dnl -define(m4_OP_MPI_SUM, `m4_ARG3$1 = m4_ARG1$1 + m4_ARG2$1;')dnl -define(m4_OP_MPI_PROD, `m4_ARG3$1 = m4_ARG1$1 * m4_ARG2$1;')dnl -define(m4_OP_MPI_LAND, `m4_ARG3$1 = m4_ARG1$1 && m4_ARG2$1;')dnl -define(m4_OP_MPI_BAND, `m4_ARG3$1 = m4_ARG1$1 & m4_ARG2$1;')dnl -define(m4_OP_MPI_LOR, `m4_ARG3$1 = m4_ARG1$1 || m4_ARG2$1;')dnl -define(m4_OP_MPI_BOR, `m4_ARG3$1 = m4_ARG1$1 | m4_ARG2$1;')dnl -define(m4_OP_MPI_LXOR, `m4_ARG3$1 = ((m4_ARG1$1 ? 1 : 0) ^ (m4_ARG2$1 ? 1 : 0));')dnl -define(m4_OP_MPI_BXOR, `m4_ARG3$1 = ((m4_ARG1$1) ^ (m4_ARG2$1));')dnl -define(m4_OP_MPI_MINLOC, `if(m4_ARG1$1_VAL > m4_ARG2$1_VAL) { - m4_ARG3$1_VAL = m4_ARG2$1_VAL; m4_ARG3$1_RANK = m4_ARG2$1_RANK; - } else { - m4_ARG3$1_VAL = m4_ARG1$1_VAL; m4_ARG3$1_RANK = m4_ARG1$1_RANK; - }')dnl -define(m4_OP_MPI_MAXLOC, `if(m4_ARG1$1_VAL < m4_ARG2$1_VAL) { - m4_ARG3$1_VAL = m4_ARG2$1_VAL; m4_ARG3$1_RANK = m4_ARG2$1_RANK; - } else { - m4_ARG3$1_VAL = m4_ARG1$1_VAL; m4_ARG3$1_RANK = m4_ARG1$1_RANK; - }')dnl -dnl -dnl ########## define helper macros ################# -dnl ########## loop-unrolled version -> slows it down :-( ###### -dnl define(m4_IF, `if(op == $1) { -dnl /* loop unrolling - 4 */ -dnl for(i=0; ival)dnl -define(m4_ARG2_VAL, ptr2->val)dnl -define(m4_ARG3_VAL, ptr3->val)dnl -define(m4_ARG1_RANK, ptr1->rank)dnl -define(m4_ARG2_RANK, ptr2->rank)dnl -define(m4_ARG3_RANK, ptr3->rank)dnl - m4_OP_$1 - } - }')dnl -dnl ########################################################## -define(m4_TYPE, `if(type == $1) { - m4_OPTYPE_$1($1) - }')dnl -dnl ########## define possible operations for each type -dnl -dnl -dnl ####### MPI_INT ######## -define(m4_OPTYPE_MPI_INT, `define(m4_CTYPE_$1, `int')dnl -m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl -m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl -m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl -m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_LONG ######## -define(m4_OPTYPE_MPI_LONG, `define(m4_CTYPE_$1, `long')dnl -m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl -m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl -m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl -m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_SHORT ######## -define(m4_OPTYPE_MPI_SHORT, `define(m4_CTYPE_$1, `short')dnl -m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl -m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl -m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl -m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_UNSIGNED ######## -define(m4_OPTYPE_MPI_UNSIGNED, `define(m4_CTYPE_$1, `unsigned int')dnl -m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl -m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl -m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl -m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_UNSIGNED_LONG ######## -define(m4_OPTYPE_MPI_UNSIGNED_LONG, `define(m4_CTYPE_$1, `unsigned long')dnl -m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl -m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl -m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl -m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_UNSIGNED_SHORT ######## -define(m4_OPTYPE_MPI_UNSIGNED_SHORT, `define(m4_CTYPE_$1, `unsigned short')dnl -m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl -m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl -m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl -m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_FLOAT ######## -define(m4_OPTYPE_MPI_FLOAT, `define(m4_CTYPE_$1, `float')dnl -m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl -m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_DOUBLE ######## -define(m4_OPTYPE_MPI_DOUBLE, `define(m4_CTYPE_$1, `double')dnl -m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl -m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_LONG_DOUBLE ######## -define(m4_OPTYPE_MPI_LONG_DOUBLE, `define(m4_CTYPE_$1, `long double')dnl -m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl -m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_BYTE ######## -define(m4_OPTYPE_MPI_BYTE, `define(m4_CTYPE_$1, `char')dnl -m4_IF(MPI_BAND, $1) else m4_IF(MPI_BOR, $1) else dnl -m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_FLOAT_INT ######## -define(m4_OPTYPE_MPI_FLOAT_INT, `define(m4_CTYPE1_$1, `float')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `float_int')dnl -m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_DOUBLE_INT ######## -define(m4_OPTYPE_MPI_DOUBLE_INT, `define(m4_CTYPE1_$1, `double')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `double_int')dnl -m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_LONG_INT ######## -define(m4_OPTYPE_MPI_LONG_INT, `define(m4_CTYPE1_$1, `long')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `long_int')dnl -m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_2INT ######## -define(m4_OPTYPE_MPI_2INT, `define(m4_CTYPE1_$1, `int')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `int_int')dnl -m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_SHORT_INT ######## -define(m4_OPTYPE_MPI_SHORT_INT, `define(m4_CTYPE1_$1, `short')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `short_int')dnl -m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### MPI_LONG_DOUBLE_INT ######## -define(m4_OPTYPE_MPI_LONG_DOUBLE_INT, `define(m4_CTYPE1_$1, `long double')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `long_double_int')dnl -m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl -dnl -dnl ####### begin the real program :-) ######### -dnl -#include "nbc.h" - -/****************** THIS FILE is automatically generated ********************* - * changes will be deleted at the next generation of this file - see nbc_op.c.m4 */ - -int NBC_Operation(void *buf3, void *buf1, void *buf2, MPI_Op op, MPI_Datatype type, int count) { - int i; - - m4_TYPE(MPI_INT) else dnl -m4_TYPE(MPI_LONG) else dnl -m4_TYPE(MPI_SHORT) else dnl -m4_TYPE(MPI_UNSIGNED) else dnl -m4_TYPE(MPI_UNSIGNED_LONG) else dnl -m4_TYPE(MPI_UNSIGNED_SHORT) else dnl -m4_TYPE(MPI_FLOAT) else dnl -m4_TYPE(MPI_DOUBLE) else dnl -m4_TYPE(MPI_LONG_DOUBLE) else dnl -m4_TYPE(MPI_BYTE) else dnl -m4_TYPE(MPI_FLOAT_INT) else dnl -m4_TYPE(MPI_DOUBLE_INT) else dnl -m4_TYPE(MPI_LONG_INT) else dnl -m4_TYPE(MPI_2INT) else dnl -m4_TYPE(MPI_SHORT_INT) else dnl -m4_TYPE(MPI_LONG_DOUBLE_INT) else dnl -return NBC_DATATYPE_NOT_SUPPORTED; - - return NBC_OK; -} - diff --git a/ompi/mca/coll/libnbc/ompi_component.c b/ompi/mca/coll/libnbc/ompi_component.c deleted file mode 100644 index c786968710..0000000000 --- a/ompi/mca/coll/libnbc/ompi_component.c +++ /dev/null @@ -1,447 +0,0 @@ -#include -#include "ompi_config.h" - -#include "ompi/constants.h" -#include "ompi/datatype/datatype.h" -#include "ompi_config.h" -#include "mpi.h" -#include "ompi/communicator/communicator.h" -#include "opal/mca/base/mca_base_param.h" -#include "ompi/mca/coll/coll.h" -#include "ompi/mca/coll/base/base.h" - -#include "ompi_component.h" - -#include "nbc.h" - -int mca_coll_libnbc_allgather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Iallgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; - -} - -int mca_coll_libnbc_allgatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void * rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Iallgatherv(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; - -} - -int mca_coll_libnbc_allreduce_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Iallreduce(sbuf, rbuf, count, dtype, op, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; -} - -int mca_coll_libnbc_alltoall_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Ialltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; -} - -int mca_coll_libnbc_alltoallv_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Ialltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; - -} - -int mca_coll_libnbc_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, - struct ompi_communicator_t *comm) -{ - - /* not implemented in libnbc yet ... - NBC_Handle handle; - int res; - - res = NBC_Iallgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; - */ - - return 0; -} - -int mca_coll_libnbc_barrier_intra(struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - /*printf("calling barrier ...\n");*/ - res = NBC_Ibarrier(comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; -} - -int mca_coll_libnbc_bcast_intra(void *buff, int count, - struct ompi_datatype_t *datatype, int root, - struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Ibcast(buff, count, datatype, root, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; - -} - -int mca_coll_libnbc_exscan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm) -{ - /* not implemented yet ... - NBC_Handle handle; - int res; - - res = NBC_Iallgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; - */ - return 0; - -} - -int mca_coll_libnbc_gather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Igather(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; -} - -int mca_coll_libnbc_gatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, int root, - struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Igatherv(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, root, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; - -} - -int mca_coll_libnbc_reduce_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - int root, struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Ireduce(sbuf, rbuf, count, dtype, op, root, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; - -} - -int mca_coll_libnbc_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Ireduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; - -} - -int mca_coll_libnbc_scan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Iscan(sbuf, rbuf, count, dtype, op, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; - -} - -int mca_coll_libnbc_scatter_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Iscatter(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; - -} - -int mca_coll_libnbc_scatterv_intra(void *sbuf, int *scounts, - int *disps, struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, int root, - struct ompi_communicator_t *comm) -{ - NBC_Handle handle; - int res; - - res = NBC_Iscatterv(sbuf, scounts, disps, sdtype, rbuf, rcount, rdtype, root, comm, &handle); - if(res != NBC_OK) return res; - res = NBC_Wait(&handle); - return res; - -} - - - - - - - - - - - - - - - - - - - - -/* - * Public string showing the coll ompi_libnbc component version number - */ -const char *mca_coll_libnbc_component_version_string = - "Open MPI libnbc collective MCA component version " OMPI_VERSION; - -/* - * Global variable - */ -int mca_coll_libnbc_priority_param = -1; - -/* - * Local function - */ -static int libnbc_open(void); - - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -const mca_coll_base_component_1_0_0_t mca_coll_libnbc_component = { - - /* First, the mca_component_t struct containing meta information - about the component itlibnbc */ - - { - /* Indicate that we are a coll v1.0.0 component (which also - implies a specific MCA version) */ - - MCA_COLL_BASE_VERSION_1_0_0, - - /* Component name and version */ - - "libnbc", - OMPI_MAJOR_VERSION, - OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION, - - /* Component open and close functions */ - - libnbc_open, - NULL - }, - - /* Next the MCA v1.0.0 component meta data */ - - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - /* Initialization / querying functions */ - - mca_coll_libnbc_init_query, - mca_coll_libnbc_comm_query, - NULL, -}; - - -static int libnbc_open(void) -{ - /* We'll always be picked if there's only one process in the - communicator */ - - mca_coll_libnbc_priority_param = - mca_base_param_register_int("coll", "libnbc", "priority", NULL, 1); - - return OMPI_SUCCESS; -} - - -/* - * Module - */ -static const mca_coll_base_module_1_0_0_t module = { - - /* Initialization / finalization functions */ - - mca_coll_libnbc_module_init, - mca_coll_libnbc_module_finalize, - - /* Collective function pointers */ - - mca_coll_libnbc_allgather_intra, - mca_coll_libnbc_allgatherv_intra, - mca_coll_libnbc_allreduce_intra, - mca_coll_libnbc_alltoall_intra, - mca_coll_libnbc_alltoallv_intra, - NULL, /* not implemented yet - mca_coll_libnbc_alltoallw_intra, */ - mca_coll_libnbc_barrier_intra, - mca_coll_libnbc_bcast_intra, - NULL, /* not implemented yet - mca_coll_libnbc_exscan_intra, */ - mca_coll_libnbc_gather_intra, - mca_coll_libnbc_gatherv_intra, - mca_coll_libnbc_reduce_intra, - mca_coll_libnbc_reduce_scatter_intra, - mca_coll_libnbc_scan_intra, - mca_coll_libnbc_scatter_intra, - mca_coll_libnbc_scatterv_intra -}; - - -/* - * Initial query function that is invoked during MPI_INIT, allowing - * this module to indicate what level of thread support it provides. - */ -int mca_coll_libnbc_init_query(bool enable_progress_threads, - bool enable_mpi_threads) -{ - /* Nothing to do */ - - return OMPI_SUCCESS; -} - - -/* - * Invoked when there's a new communicator that has been created. - * Look at the communicator and decide which set of functions and - * priority we want to return. - */ -const mca_coll_base_module_1_0_0_t * -mca_coll_libnbc_comm_query(struct ompi_communicator_t *comm, int *priority, - struct mca_coll_base_comm_t **data) -{ - /* We only work on intracommunicators */ - - if (!OMPI_COMM_IS_INTER(comm)) { - if (OMPI_SUCCESS != mca_base_param_lookup_int(mca_coll_libnbc_priority_param, priority)) { - return NULL; - } - /* printf("returning prio: %i\n", *priority); */ - - return &module; - } - - return NULL; -} - - -/* - * Init module on the communicator - */ -const struct mca_coll_base_module_1_0_0_t* mca_coll_libnbc_module_init(struct ompi_communicator_t *comm) -{ - - comm->c_coll_selected_data = (void*)NBC_Init_comm(comm); - if(NULL == comm->c_coll_selected_data) return NULL; - /* printf("communicator initialized comminfo: %lu:)\n", (unsigned long)comm->c_coll_selected_data); */ - - return &module; -} - - -/* - * Finalize module on the communicator - */ -int mca_coll_libnbc_module_finalize(struct ompi_communicator_t *comm) -{ - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/coll/libnbc/ompi_component.h b/ompi/mca/coll/libnbc/ompi_component.h deleted file mode 100644 index 1805675ec4..0000000000 --- a/ompi/mca/coll/libnbc/ompi_component.h +++ /dev/null @@ -1,117 +0,0 @@ -#ifndef MCA_COLL_libnbc_EXPORT_H -#define MCA_COLL_libnbc_EXPORT_H - -#include "ompi_config.h" - -#include "mpi.h" -#include "opal/mca/mca.h" -#include "ompi/mca/coll/coll.h" -#include "ompi/request/request.h" -#include "ompi/mca/pml/pml.h" - -#include "nbc.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Globally exported variable - */ - -extern const mca_coll_base_component_1_0_0_t mca_coll_libnbc_component; -extern int mca_coll_libnbc_priority_param; - - -/* - * coll API functions - */ - - - /* API functions */ - - int mca_coll_libnbc_init_query(bool enable_progress_threads, - bool enable_mpi_threads); - const struct mca_coll_base_module_1_0_0_t * - mca_coll_libnbc_comm_query(struct ompi_communicator_t *comm, int *priority, - struct mca_coll_base_comm_t **data); - - const struct mca_coll_base_module_1_0_0_t * - mca_coll_libnbc_module_init(struct ompi_communicator_t *comm); - int mca_coll_libnbc_module_finalize(struct ompi_communicator_t *comm); - - int mca_coll_libnbc_allgather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm); - int mca_coll_libnbc_allgatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void * rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm); - int mca_coll_libnbc_allreduce_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm); - int mca_coll_libnbc_alltoall_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm); - int mca_coll_libnbc_alltoallv_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm); - int mca_coll_libnbc_alltoallw_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t **sdtypes, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t **rdtypes, - struct ompi_communicator_t *comm); - int mca_coll_libnbc_barrier_intra(struct ompi_communicator_t *comm); - int mca_coll_libnbc_bcast_intra(void *buff, int count, - struct ompi_datatype_t *datatype, - int root, - struct ompi_communicator_t *comm); - int mca_coll_libnbc_exscan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm); - int mca_coll_libnbc_gather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, - int root, struct ompi_communicator_t *comm); - int mca_coll_libnbc_gatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, int root, - struct ompi_communicator_t *comm); - int mca_coll_libnbc_reduce_intra(void *sbuf, void* rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - int root, - struct ompi_communicator_t *comm); - int mca_coll_libnbc_reduce_scatter_intra(void *sbuf, void *rbuf, - int *rcounts, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm); - int mca_coll_libnbc_scan_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - struct ompi_communicator_t *comm); - int mca_coll_libnbc_scatter_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, - int root, struct ompi_communicator_t *comm); - int mca_coll_libnbc_scatterv_intra(void *sbuf, int *scounts, int *disps, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, int root, - struct ompi_communicator_t *comm); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* MCA_COLL_libnbc_EXPORT_H */