1
1

BML (BTL Managment Layer). Allows BTL's to be used outside of the PML. See

bml.h and PML-OB1 for usage. 

This commit was SVN r6815.
Этот коммит содержится в:
Galen Shipman 2005-08-12 02:41:14 +00:00
родитель 311efa5bcc
Коммит c3c83aa3e1
59 изменённых файлов: 2371 добавлений и 1010 удалений

Просмотреть файл

@ -143,6 +143,7 @@ libmpi_la_LIBADD = \
mca/io/base/libmca_io_base.la $(MCA_io_STATIC_LTLIBS) \
mca/mpool/base/libmca_mpool_base.la $(MCA_mpool_STATIC_LTLIBS) \
mca/pml/base/libmca_pml_base.la $(MCA_pml_STATIC_LTLIBS) \
mca/bml/base/libmca_bml_base.la $(MCA_bml_STATIC_LTLIBS) \
mca/ptl/base/libmca_ptl_base.la $(MCA_ptl_STATIC_LTLIBS) \
mca/topo/base/libmca_topo_base.la $(MCA_topo_STATIC_LTLIBS) \
mpi/c/libmpi_c.la $(c_mpi_lib) $(c_pmpi_lib) \

Просмотреть файл

@ -19,7 +19,8 @@ include $(top_srcdir)/config/Makefile.options
SUBDIRS = \
common \
allocator \
btl \
bml \
btl \
coll \
io \
mpool \

26
ompi/dynamic-mca/bml/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,26 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/config/Makefile.options
SUBDIRS = $(MCA_bml_DSO_SUBDIRS)
DISTCLEANFILES = $(SUBDIRS)
# Every directory under here is a sym link to something in the main
# src/mca tree. Hence, we don't want to distribute anything under
# here.
DIST_SUBDIRS =

Просмотреть файл

@ -21,7 +21,8 @@ EXTRA_DIST = win_makefile
SUBDIRS = \
common \
allocator \
btl \
bml \
btl \
coll \
io \
mpool \

35
ompi/mca/bml/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,35 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/config/Makefile.options
SUBDIRS = base $(MCA_bml_STATIC_SUBDIRS)
DIST_SUBDIRS = base $(MCA_bml_ALL_SUBDIRS)
# Source code files
headers = bml.h
nodist_headers = bml_direct_call.h
# Conditionally install the header files
if WANT_INSTALL_HEADERS
ompidir = $(includedir)/openmpi/ompi/mca/bml
dist_ompi_HEADERS = $(headers)
nodist_ompi_HEADERS = $(nodist_headers)
else
ompidir = $(includedir)
endif

53
ompi/mca/bml/base/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,53 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/config/Makefile.options
noinst_LTLIBRARIES = libmca_bml_base.la
# For VPATH builds, have to specify where static-modules.h will be found
AM_CPPFLAGS = -I$(top_builddir)/src
# Source code files
headers = \
base.h \
bml_base_btl.h \
bml_base_endpoint.h
libmca_bml_base_la_SOURCES = \
$(headers) \
bml_base_btl.c \
bml_base_btl.h \
bml_base_endpoint.h \
bml_base_endpoint.c \
bml_base_init.c \
bml_base_close.c \
bml_base_open.c
# Conditionally install the header files
if WANT_INSTALL_HEADERS
ompidir = $(includedir)/openmpi/ompi/mca/bml/base
ompi_HEADERS = $(headers)
else
ompidir = $(includedir)
endif
distclean-local:
rm -f static-components.h

65
ompi/mca/bml/base/base.h Обычный файл
Просмотреть файл

@ -0,0 +1,65 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BML_BASE_H
#define MCA_BML_BASE_H
#include "ompi_config.h"
#include "mca/mca.h"
#include "mca/bml/bml.h"
/*
* Global functions for the BML
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct mca_bml_base_selected_module_t {
opal_list_item_t super;
mca_bml_base_component_t *bml_component;
mca_bml_base_module_t *bml_module;
};
typedef struct mca_bml_base_selected_module_t mca_bml_base_selected_module_t;
OBJ_CLASS_DECLARATION(mca_bml_base_selected_module_t);
/*
* Global functions for MCA: overall BTL open and close
*/
OMPI_DECLSPEC int mca_bml_base_open(void);
OMPI_DECLSPEC int mca_bml_base_init(bool enable_progress_threads, bool enable_mpi_threads);
OMPI_DECLSPEC int mca_bml_base_close(void);
/*
* Globals
*/
OMPI_DECLSPEC extern int mca_bml_base_output;
OMPI_DECLSPEC extern mca_bml_base_component_t mca_bml_component;
OMPI_DECLSPEC extern opal_list_t mca_bml_base_components_available;
OMPI_DECLSPEC extern mca_bml_base_module_t mca_bml;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_BML_BASE_H */

62
ompi/mca/bml/base/bml_base_btl.c Обычный файл
Просмотреть файл

@ -0,0 +1,62 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "mca/bml/bml.h"
#include "bml_base_btl.h"
static void mca_bml_base_btl_array_construct(mca_bml_base_btl_array_t* array)
{
array->bml_btls = NULL;
array->arr_size = 0;
array->arr_index = 0;
array->arr_reserve = 0;
}
static void mca_bml_base_btl_array_destruct(mca_bml_base_btl_array_t* array)
{
if(NULL != array->bml_btls)
free(array->bml_btls);
}
OBJ_CLASS_INSTANCE(
mca_bml_base_btl_array_t,
opal_object_t,
mca_bml_base_btl_array_construct,
mca_bml_base_btl_array_destruct
);
int mca_bml_base_btl_array_reserve(mca_bml_base_btl_array_t* array, size_t size)
{
size_t old_len = sizeof(mca_bml_base_btl_t)*array->arr_reserve;
size_t new_len = sizeof(mca_bml_base_btl_t)*size;
if(old_len >= new_len)
return OMPI_SUCCESS;
array->bml_btls = realloc(array->bml_btls, new_len);
if(NULL == array->bml_btls)
return OMPI_ERR_OUT_OF_RESOURCE;
memset((unsigned char*)array->bml_btls + old_len, 0, new_len-old_len);
array->arr_reserve = size;
return OMPI_SUCCESS;
}

42
ompi/mca/bml/base/bml_base_btl.h Обычный файл
Просмотреть файл

@ -0,0 +1,42 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BML_BASE_BTL_H
#define MCA_BML_BASE_BTL_H
#include "ompi_config.h"
#include "mca/mca.h"
#include "mca/bml/bml.h"
#include "opal/util/output.h"
/*
* Global functions for the BML
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_BML_BASE_H */

26
ompi/mca/bml/base/bml_base_close.c Обычный файл
Просмотреть файл

@ -0,0 +1,26 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "mca/btl/base/base.h"
#include "mca/bml/base/base.h"
int mca_bml_base_close( void ) {
int rc;
if(OMPI_SUCCESS != (rc = mca_btl_base_close()))
return rc;
return OMPI_SUCCESS;
}

54
ompi/mca/bml/base/bml_base_endpoint.c Обычный файл
Просмотреть файл

@ -0,0 +1,54 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "include/sys/atomic.h"
#include "mca/bml/bml.h"
#include "bml_base_endpoint.h"
#include "mca/pml/pml.h"
static void mca_bml_base_endpoint_construct(mca_bml_base_endpoint_t* proc)
{
proc->btl_proc = NULL;
proc->btl_rdma_offset = 0;
proc->btl_flags = 0;
OBJ_CONSTRUCT(&proc->btl_lock, opal_mutex_t);
OBJ_CONSTRUCT(&proc->btl_eager, mca_bml_base_btl_array_t);
OBJ_CONSTRUCT(&proc->btl_send, mca_bml_base_btl_array_t);
OBJ_CONSTRUCT(&proc->btl_rdma, mca_bml_base_btl_array_t);
}
static void mca_bml_base_endpoint_destruct(mca_bml_base_endpoint_t* proc)
{
OBJ_DESTRUCT(&proc->btl_lock);
OBJ_DESTRUCT(&proc->btl_eager);
OBJ_DESTRUCT(&proc->btl_send);
OBJ_DESTRUCT(&proc->btl_rdma);
}
OBJ_CLASS_INSTANCE(
mca_bml_base_endpoint_t,
opal_object_t,
mca_bml_base_endpoint_construct,
mca_bml_base_endpoint_destruct
);

45
ompi/mca/bml/base/bml_base_endpoint.h Обычный файл
Просмотреть файл

@ -0,0 +1,45 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BML_BASE_ENDPOINT_H
#define MCA_BML_BASE_ENDPOINT_H
#include "ompi_config.h"
#include "mca/mca.h"
#include "mca/bml/bml.h"
#include "opal/util/output.h"
#include "opal/threads/mutex.h"
#include "mca/pml/pml.h"
#include "bml_base_btl.h"
/*
* Global functions for the BML
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_BML_BASE_H */

85
ompi/mca/bml/base/bml_base_init.c Обычный файл
Просмотреть файл

@ -0,0 +1,85 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "mca/bml/base/base.h"
#include "mca/base/base.h"
#include "mca/mca.h"
int mca_bml_base_output = -1;
OMPI_DECLSPEC mca_bml_base_module_t mca_bml = {
NULL, /* bml_component */
0, /* bml_eager_limit */
0, /* bml_min_send_size */
0, /* bml_max_send_size */
0, /* bml_min_rdma_size */
0, /* bml_max_rdma_size */
NULL, /* bml_add_procs */
NULL, /* bml_del_procs */
NULL, /* bml_register */
NULL, /* bml_finalize*/
NULL /* bml_progress */
};
mca_bml_base_component_t mca_bml_component;
int mca_bml_base_init( bool enable_progress_threads,
bool enable_mpi_threads ) {
opal_list_item_t *item = NULL;
mca_bml_base_component_t *component = NULL, *best_component = NULL;
mca_bml_base_module_t *module = NULL, *best_module = NULL;
int priority = 0, best_priority = -1;
mca_base_component_list_item_t *cli = NULL;
for (item = opal_list_get_first(&mca_bml_base_components_available);
opal_list_get_end(&mca_bml_base_components_available) != item;
item = opal_list_get_next(item)) {
cli = (mca_base_component_list_item_t*) item;
component = (mca_bml_base_component_t*) cli->cli_component;
if(NULL == component->bml_init) {
opal_output_verbose( 10, mca_bml_base_output,
"select: no init function; ignoring component %s",
component->bml_version.mca_component_name );
continue;
}
module = component->bml_init(&priority,
enable_progress_threads,
enable_mpi_threads );
if(NULL == module) {
continue;
}
if(priority > best_priority) {
best_priority = priority;
best_component = component;
best_module = module;
}
}
if(NULL == best_module) {
return OMPI_SUCCESS;
}
else {
mca_bml_component = *best_component;
mca_bml = *best_module;
return mca_base_components_close(mca_bml_base_output,
&mca_bml_base_components_available,
(mca_base_component_t*) best_component);
}
}

17
ompi/mca/bml/base/bml_base_lookup.c Обычный файл
Просмотреть файл

@ -0,0 +1,17 @@
mca_bml_base_module_t* mca_bml_base_module_lookup(const char* name)
{
opal_list_item_t* item;
for (item = opal_list_get_first(&mca_bml_base_modules);
item != opal_list_get_end(&mca_bml_base_modules);
item = opal_list_get_next(item)) {
mca_mpool_base_selected_module_t *mli =
(mca_mpool_base_selected_module_t *) item;
if(0 == strcmp(mli->mpool_component->mpool_version.mca_component_name,
name)) {
return mli->mpool_module;
}
}
return NULL;
}

39
ompi/mca/bml/base/bml_base_open.c Обычный файл
Просмотреть файл

@ -0,0 +1,39 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdio.h>
#include "mca/bml/bml.h"
#include "mca/bml/base/base.h"
#include "mca/btl/base/base.h"
#include "ompi/mca/bml/base/static-components.h"
opal_list_t mca_bml_base_components_available;
int mca_bml_base_open( void ) {
if(OMPI_SUCCESS !=
mca_base_components_open("bml", 0, mca_bml_base_static_components,
&mca_bml_base_components_available,
true)) {
return OMPI_ERROR;
}
return mca_btl_base_open();
}

10
ompi/mca/bml/base/static-components.h Обычный файл
Просмотреть файл

@ -0,0 +1,10 @@
/*
* $HEADER$
*/
extern const mca_base_component_t mca_bml_r2_component;
const mca_base_component_t *mca_bml_base_static_components[] = {
&mca_bml_r2_component,
NULL
};

545
ompi/mca/bml/bml.h Обычный файл
Просмотреть файл

@ -0,0 +1,545 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* BML Management Layer (BML)
*
*/
#include "mca/mca.h"
#include "mca/btl/btl.h"
#include "mca/bml/base/bml_base_btl.h"
#include "mca/bml/base/bml_base_endpoint.h"
#ifndef MCA_BML_H
#define MCA_BML_H
#include "include/types.h"
#include "class/ompi_free_list.h"
#include "mca/pml/pml.h"
#include "mca/pml/ob1/pml_ob1_hdr.h"
/*
* BML types
*/
struct ompi_proc_t;
struct mca_bml_base_module_t;
struct mca_bml_base_endpoint_t;
struct mca_mpool_base_resources_t;
/*
* Cached set of information for each btl
* NOTE TO GALEN -- this replaces mca_pml_ob1_endpoint_t
*/
struct mca_bml_base_btl_t {
int btl_index; /**< index in endpoint array */
int btl_weight; /**< BTL weight for scheduling */
int btl_flags; /**< support for put/get? */
size_t btl_eager_limit; /**< BTL eager limit */
size_t btl_min_send_size; /**< BTL min send size */
size_t btl_max_send_size; /**< BTL max send size */
size_t btl_min_rdma_size; /**< BTL min rdma size */
size_t btl_max_rdma_size; /**< BTL max rdma size */
struct mca_btl_base_module_t *btl; /**< BTL module */
struct mca_btl_base_endpoint_t* btl_endpoint; /**< BTL addressing info */
struct mca_btl_base_descriptor_t* btl_cache;
/* BTL function table */
mca_btl_base_module_alloc_fn_t btl_alloc;
mca_btl_base_module_free_fn_t btl_free;
mca_btl_base_module_send_fn_t btl_send;
mca_btl_base_module_prepare_fn_t btl_prepare_src;
mca_btl_base_module_prepare_fn_t btl_prepare_dst;
mca_btl_base_module_put_fn_t btl_put;
mca_btl_base_module_get_fn_t btl_get;
mca_btl_base_component_progress_fn_t btl_progress;
};
typedef struct mca_bml_base_btl_t mca_bml_base_btl_t;
/**
* A dynamically growable array of mca_bml_base_btl_t instances.
* Maintains an index into the array that is used for round-robin
* scheduling across contents.
*/
struct mca_bml_base_btl_array_t {
opal_object_t super;
size_t arr_size; /**< number available */
size_t arr_reserve; /**< size of allocated btl_proc array */
size_t arr_index; /**< last used index*/
mca_bml_base_btl_t* bml_btls; /**< array of bml btl's */
};
typedef struct mca_bml_base_btl_array_t mca_bml_base_btl_array_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bml_base_btl_array_t);
/**
* If required, reallocate (grow) the array to the indicate size.
*
* @param array (IN)
* @param size (IN)
*/
int mca_bml_base_btl_array_reserve(mca_bml_base_btl_array_t*, size_t);
static inline size_t mca_bml_base_btl_array_get_size(mca_bml_base_btl_array_t* array)
{
return array->arr_size;
}
/**
* Grow the array if required, and set the size.
*
* @param array (IN)
* @param size (IN)
*/
static inline void mca_bml_base_btl_array_set_size(mca_bml_base_btl_array_t* array, size_t size)
{
if(array->arr_size > array->arr_reserve)
mca_bml_base_btl_array_reserve(array, size);
array->arr_size = size;
}
/**
* Grow the array size by one and return the item at that index.
*
* @param array (IN)
*/
static inline mca_bml_base_btl_t* mca_bml_base_btl_array_insert(mca_bml_base_btl_array_t* array)
{
#if OMPI_ENABLE_DEBUG
if(array->arr_size >= array->arr_reserve) {
opal_output(0, "mca_bml_base_btl_array_insert: invalid array index %d >= %d",
array->arr_size, array->arr_reserve);
return 0;
}
#endif
return &array->bml_btls[array->arr_size++];
}
/**
* Return an array item at the specified index.
*
* @param array (IN)
* @param index (IN)
*/
static inline mca_bml_base_btl_t* mca_bml_base_btl_array_get_index(mca_bml_base_btl_array_t* array, size_t index)
{
#if OMPI_ENABLE_DEBUG
if(index >= array->arr_size) {
opal_output(0, "mca_bml_base_btl_array_get_index: invalid array index %d >= %d",
index, array->arr_size);
return 0;
}
#endif
return &array->bml_btls[index];
}
/**
* Return the next LRU index in the array.
*
* @param array (IN)
* @param index (IN)
*/
static inline mca_bml_base_btl_t* mca_bml_base_btl_array_get_next(mca_bml_base_btl_array_t* array)
{
mca_bml_base_btl_t* bml_btl;
#if OMPI_ENABLE_DEBUG
if(array->arr_size == 0) {
opal_output(0, "mca_bml_base_btl_array_get_next: invalid array size");
return 0;
}
#endif
bml_btl = &array->bml_btls[array->arr_index++];
if(array->arr_index == array->arr_size) {
array->arr_index = 0;
}
return bml_btl;
}
/**
* Locate an element in the array
*
* @param array (IN)
* @param index (IN)
*/
static inline mca_bml_base_btl_t* mca_bml_base_btl_array_find(
mca_bml_base_btl_array_t* array, struct mca_btl_base_module_t* btl)
{
size_t i=0;
for(i=0; i<array->arr_size; i++) {
if(array->bml_btls[i].btl == btl) {
return &array->bml_btls[i];
}
}
return NULL;
}
/**
* Structure associated w/ ompi_proc_t that contains the set
* of BTLs used to reach a destinationation
* NOTE TO GALEN -- this replaces mca_pml_ob1_proc_t
*/
struct mca_bml_base_endpoint_t {
mca_pml_proc_t super;
ompi_proc_t *btl_proc; /**< back-pointer to ompi_proc_t */
opal_mutex_t btl_lock; /**< lock to protect against concurrent access */
int btl_flags; /**< prefered method of accessing this peer */
size_t btl_rdma_offset; /**< max of min rdma size for available rmda btls */
mca_bml_base_btl_array_t btl_eager; /**< array of btls to use for first fragments */
mca_bml_base_btl_array_t btl_send; /**< array of btls to use for remaining fragments */
mca_bml_base_btl_array_t btl_rdma; /**< array of btls that support (prefer) rdma */
};
typedef struct mca_bml_base_endpoint_t mca_bml_base_endpoint_t;
OMPI_COMP_EXPORT OBJ_CLASS_DECLARATION(mca_bml_base_endpoint_t);
static inline void mca_bml_base_alloc(mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t** des, size_t size) {
*des = bml_btl->btl_alloc(bml_btl->btl, size);
if((*des) != NULL) {
(*des)->des_context = (void*) bml_btl;
}
}
static inline void mca_bml_base_free(mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t* des) {
bml_btl->btl_free( bml_btl->btl, des );
des->des_context = NULL;
}
static inline int mca_bml_base_send(mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t* des, mca_btl_base_tag_t tag) {
des->des_context = (void*) bml_btl;
return bml_btl->btl_send(
bml_btl->btl,
bml_btl->btl_endpoint,
des,
tag);
}
static inline int mca_bml_base_put(mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t* des) {
des->des_context = (void*) bml_btl;
return bml_btl->btl_put(
bml_btl->btl,
bml_btl->btl_endpoint,
des);
}
static inline void mca_bml_base_prepare_src(mca_bml_base_btl_t* bml_btl,
mca_mpool_base_registration_t* reg,
struct ompi_convertor_t* conv,
size_t reserve,
size_t *size,
mca_btl_base_descriptor_t** des) {
*des = bml_btl->btl_prepare_src(
bml_btl->btl,
bml_btl->btl_endpoint,
reg,
conv,
reserve,
size
);
if((*des) != NULL) {
(*des)->des_context = (void*) bml_btl;
}
}
static inline void mca_bml_base_prepare_dst(mca_bml_base_btl_t* bml_btl,
mca_mpool_base_registration_t* reg,
struct ompi_convertor_t* conv,
size_t reserve,
size_t *size,
mca_btl_base_descriptor_t** des) {
*des = bml_btl->btl_prepare_dst(
bml_btl->btl,
bml_btl->btl_endpoint,
reg,
conv,
reserve,
size
);
if((*des) != NULL) {
(*des)->des_context = (void*) bml_btl;
}
}
#if OMPI_HAVE_THREAD_SUPPORT
#define MCA_BML_BASE_BTL_DES_ALLOC(bml_btl, des, size) \
do { \
if(NULL != (des = bm_btl->btl_cache)) { \
/* atomically acquire the cached descriptor */ \
if(opal_atomic_cmpset_ptr(&bml_btl->btl_cache, des, NULL) == 0) { \
bml_btl->btl_cache = NULL; \
} else { \
des = bml_btl->btl_alloc(bml_btl->btl, size + \
MCA_BTL_DES_MAX_SEGMENTS * sizeof(mca_btl_base_segment_t)); \
} \
} else { \
des = bml_btl->btl_alloc(bml_btl->btl, size + \
MCA_BTL_DES_MAX_SEGMENTS * sizeof(mca_btl_base_segment_t)); \
} \
des->des_src->seg_len = size; \
des->des_context = (void*) bml_btl; \
} while(0)
#else
#define MCA_BML_BASE_BTL_DES_ALLOC(bml_btl, descriptor, size) \
do { \
if(NULL != (descriptor = bml_btl->btl_cache)) { \
bml_btl->btl_cache = NULL; \
} else { \
descriptor = bml_btl->btl_alloc(bml_btl->btl, sizeof(mca_pml_ob1_hdr_t) + \
MCA_BTL_DES_MAX_SEGMENTS * sizeof(mca_btl_base_segment_t)); \
} \
descriptor->des_src->seg_len = size; \
descriptor->des_context = (void*) bml_btl; \
} while(0)
#endif
/**
* Return a descriptor
*/
#if OMPI_HAVE_THREAD_SUPPORT
#define MCA_BML_BASE_BTL_DES_RETURN( bml_btl, descriptor ) \
do { \
if(NULL == bml_btl->btl_cache) { \
if(opal_atomic_cmpset_ptr(&bml_btl->btl_cache,NULL,descriptor) == 0) { \
bml_btl->btl_free(bml_btl->btl,descriptor); \
} \
} else { \
bml_btl->btl_free(bml_btl->btl,descriptor); \
} \
} while(0)
#else
#define MCA_BML_BASE_BTL_DES_RETURN(bml_btl, descriptor) \
do { \
if(NULL == bml_btl->btl_cache) { \
bml_btl->btl_cache = descriptor; \
} else { \
bml_btl->btl_free(bml_btl->btl,descriptor); \
} \
} while(0)
#endif
/*
* BML component interface functions and datatype.
*/
/**
* MCA->BML Initializes the BML component and creates specific BML
* module(s).
*
* @param num_bmls (OUT) Returns the number of bml modules created, or 0
* if the transport is not available.
*
* @param enable_progress_threads (IN) Whether this component is
* allowed to run a hidden/progress thread or not.
*
* @param enable_mpi_threads (IN) Whether support for multiple MPI
* threads is enabled or not (i.e., MPI_THREAD_MULTIPLE), which
* indicates whether multiple threads may invoke this component
* simultaneously or not.
*
* @return Array of pointers to BML modules, or NULL if the transport
* is not available.
*
* During component initialization, the BML component should discover
* the physical devices that are available for the given transport,
* and create a BML module to represent each device. Any addressing
* information required by peers to reach the device should be published
* during this function via the mca_base_modex_send() interface.
*
*/
typedef struct mca_bml_base_module_t* (*mca_bml_base_component_init_fn_t)(
int* priority,
bool enable_progress_threads,
bool enable_mpi_threads
);
/**
* MCA->BML Called to progress outstanding requests for
* non-threaded polling environments.
*
* @param tstamp Current time.
* @return OMPI_SUCCESS or error code on failure.
*/
typedef int (*mca_bml_base_module_progress_fn_t)(void);
/**
* BML component descriptor. Contains component version information
* and component open/close/init functions.
*/
struct mca_bml_base_component_1_0_0_t {
mca_base_component_t bml_version;
mca_base_component_data_1_0_0_t bml_data;
mca_bml_base_component_init_fn_t bml_init;
};
typedef struct mca_bml_base_component_1_0_0_t mca_bml_base_component_1_0_0_t;
typedef struct mca_bml_base_component_1_0_0_t mca_bml_base_component_t;
/*
* BML module interface functions and datatype.
*/
/**
* MCA->BML Clean up any resources held by BML module
* before the module is unloaded.
*
* @param bml (IN) BML module.
*
* Prior to unloading a BML module, the MCA framework will call
* the BML finalize method of the module. Any resources held by
* the BML should be released and if required the memory corresponding
* to the BML module freed.
*
*/
typedef int (*mca_bml_base_module_finalize_fn_t)( void );
/**
* PML->BML notification of change in the process list.
*
* @param bml (IN) BML module
* @param nprocs (IN) Number of processes
* @param procs (IN) Set of processes
* @param endpoint (OUT) Set of (optional) mca_bml_base_endpoint_t structures by BML.
* @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this BML.
* @return OMPI_SUCCESS or error status on failure.
*
* The mca_bml_base_module_add_procs_fn_t() is called by the PML to
* determine the set of BMLs that should be used to reach each process.
* Any addressing information exported by the peer via the mca_base_modex_send()
* function should be available during this call via the corresponding
* mca_base_modex_recv() function. The BML may utilize this information to
* determine reachability of each peer process.
*
* For each process that is reachable by the BML, the bit corresponding to the index
* into the proc array (nprocs) should be set in the reachable bitmask. The PML
* provides the BML the option to return a pointer to a data structure defined
* by the BML that is returned to the BML on subsequent calls to the BML data
* transfer functions (e.g bml_send). This may be used by the BML to cache any addressing
* or connection information (e.g. TCP socket, IP queue pair).
*/
typedef int (*mca_bml_base_module_add_procs_fn_t)(
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_bml_base_endpoint_t** endpoints,
struct ompi_bitmap_t* reachable
);
/**
* Notification of change to the process list.
*
* @param bml (IN) BML module
* @param nprocs (IN) Number of processes
* @param proc (IN) Set of processes
* @param peer (IN) Set of peer addressing information.
* @return Status indicating if cleanup was successful
*
* When the process list changes, the PML notifies the BML of the
* change, to provide the opportunity to cleanup or release any
* resources associated with the peer.
*/
typedef int (*mca_bml_base_module_del_procs_fn_t)(
size_t nprocs,
struct ompi_proc_t** procs
);
/**
* Callback function that is called asynchronously on receipt
* of data by the transport layer.
*/
typedef void (*mca_bml_base_module_recv_cb_fn_t)(
mca_btl_base_module_t* bml_btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata
);
/**
* Register a callback function that is called on receipt
* of a fragment.
*
* @param bml (IN) BML module
* @return Status indicating if cleanup was successful
*
* When the process list changes, the PML notifies the BML of the
* change, to provide the opportunity to cleanup or release any
* resources associated with the peer.
*/
typedef int (*mca_bml_base_module_register_fn_t)(
mca_btl_base_tag_t tag,
mca_bml_base_module_recv_cb_fn_t cbfunc,
void* cbdata
);
/**
* BML module interface functions and attributes.
*/
struct mca_bml_base_module_t {
/* BML common attributes */
mca_bml_base_component_t* bml_component; /**< pointer back to the BML component structure */
size_t bml_eager_limit; /**< maximum size of first fragment -- eager send */
size_t bml_min_send_size; /**< threshold below which the BML should not fragment */
size_t bml_max_send_size; /**< maximum send fragment size supported by the BML */
size_t bml_min_rdma_size; /**< threshold below which the BML should not fragment */
size_t bml_max_rdma_size; /**< maximum rdma fragment size supported by the BML */
/* BML function table */
mca_bml_base_module_add_procs_fn_t bml_add_procs;
mca_bml_base_module_del_procs_fn_t bml_del_procs;
mca_bml_base_module_register_fn_t bml_register;
mca_bml_base_module_finalize_fn_t bml_finalize;
mca_bml_base_module_progress_fn_t bml_progress;
};
typedef struct mca_bml_base_module_t mca_bml_base_module_t;
/*
* Macro for use in modules that are of type bml v1.0.0
*/
#define MCA_BML_BASE_VERSION_1_0_0 \
/* v1.0 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \
/* bml v1.0 */ \
"bml", 1, 0, 0
#endif /* OMPI_MCA_BML_H */

44
ompi/mca/bml/r2/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,44 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_ompi_srcdir)/config/Makefile.options
r2_sources = \
bml_r2.c \
bml_r2.h \
bml_r2_btl.c \
bml_r2_btl.h \
bml_r2_component.c
if OMPI_BUILD_bml_r2_DSO
component_noinst =
component_install = mca_bml_r2.la
else
component_noinst = libmca_bml_r2.la
component_install =
endif
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_bml_r2_la_SOURCES = $(r2_sources)
mca_bml_r2_la_LIBADD =
mca_bml_r2_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_bml_r2_la_SOURCES = $(r2_sources)
libmca_bml_r2_la_LIBADD =
libmca_bml_r2_la_LDFLAGS = -module -avoid-version

438
ompi/mca/bml/r2/bml_r2.c Обычный файл
Просмотреть файл

@ -0,0 +1,438 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdlib.h>
#include <string.h>
#include "class/ompi_bitmap.h"
#include "mca/bml/bml.h"
#include "mca/btl/btl.h"
#include "mca/btl/base/base.h"
#include "mca/bml/base/bml_base_endpoint.h"
#include "mca/bml/base/bml_base_btl.h"
#include "bml_r2.h"
#include "class/orte_proc_table.h"
extern mca_bml_base_component_t mca_bml_r2_component;
mca_bml_r2_module_t mca_bml_r2 = {
{
&mca_bml_r2_component,
0, /* eager limit */
0, /* min send size */
0, /* max send size */
0, /* min rdma size */
0, /* max rdma size */
mca_bml_r2_add_procs,
mca_bml_r2_del_procs,
mca_bml_r2_register,
mca_bml_r2_finalize,
mca_bml_r2_progress
}
};
static int btl_exclusivity_compare(const void* arg1, const void* arg2)
{
mca_btl_base_module_t* btl1 = *(struct mca_btl_base_module_t**)arg1;
mca_btl_base_module_t* btl2 = *(struct mca_btl_base_module_t**)arg2;
if( btl1->btl_exclusivity > btl2->btl_exclusivity ) {
return -1;
} else if (btl1->btl_exclusivity == btl2->btl_exclusivity ) {
return 0;
} else {
return 1;
}
}
void mca_bml_r2_recv_callback(
mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* desc,
void* cbdata
){
/* just pass it up the stack.. */
mca_bml_r2.r2_reg[tag](btl,
tag,
desc,
cbdata);
}
int mca_bml_r2_progress( void ) {
size_t i;
int count = 0;
/*
* Progress each of the PTL modules
*/
for(i=0; i<mca_bml_r2.num_btl_progress; i++) {
int rc = mca_bml_r2.btl_progress[i]();
if(rc > 0) {
count += rc;
}
}
return count;
}
static int mca_bml_r2_add_btls( void )
{
/* build an array of r2s and r2 modules */
opal_list_t* btls = &mca_btl_base_modules_initialized;
mca_btl_base_selected_module_t* selected_btl;
size_t num_btls = opal_list_get_size(btls);
mca_bml_r2.num_btl_modules = 0;
mca_bml_r2.num_btl_progress = 0;
mca_bml_r2.btl_modules = (mca_btl_base_module_t **)malloc(sizeof(mca_btl_base_module_t*) * num_btls);
mca_bml_r2.btl_progress = (mca_btl_base_component_progress_fn_t*)malloc(sizeof(mca_btl_base_component_progress_fn_t) * num_btls);
if (NULL == mca_bml_r2.btl_modules ||
NULL == mca_bml_r2.btl_progress) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(selected_btl = (mca_btl_base_selected_module_t*)opal_list_get_first(btls);
selected_btl != (mca_btl_base_selected_module_t*)opal_list_get_end(btls);
selected_btl = (mca_btl_base_selected_module_t*)opal_list_get_next(selected_btl)) {
mca_btl_base_module_t *btl = selected_btl->btl_module;
mca_bml_r2.btl_modules[mca_bml_r2.num_btl_modules++] = btl;
}
/* sort r2 list by exclusivity */
qsort(mca_bml_r2.btl_modules,
mca_bml_r2.num_btl_modules,
sizeof(struct mca_btl_base_module_t*),
btl_exclusivity_compare);
return OMPI_SUCCESS;
}
/*
* For each proc setup a datastructure that indicates the PTLs
* that can be used to reach the destination.
*
*/
int mca_bml_r2_add_procs(
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_bml_base_endpoint_t** bml_endpoints,
struct ompi_bitmap_t* reachable
)
{
size_t p;
int rc;
size_t p_index;
struct mca_bml_base_btl_t** bml_btls = NULL;
struct mca_btl_base_endpoint_t ** btl_endpoints = NULL;
if(nprocs == 0)
return OMPI_SUCCESS;
if(OMPI_SUCCESS != (rc = mca_bml_r2_add_btls()) )
return rc;
/* attempt to add all procs to each r2 */
btl_endpoints = (struct mca_btl_base_endpoint_t **) malloc(nprocs * sizeof(struct mca_btl_base_endpoint_t*));
bml_endpoints = (struct mca_bml_base_endpoint_t **)malloc(nprocs * sizeof(struct mca_bml_base_endpoint_t*));
bml_btls = (struct mca_bml_base_btl_t **) malloc(nprocs * sizeof(struct mca_bml_base_btl_t*));
for(p_index = 0; p_index < mca_bml_r2.num_btl_modules; p_index++) {
mca_btl_base_module_t* btl = mca_bml_r2.btl_modules[p_index];
int btl_inuse = 0;
/* if the r2 can reach the destination proc it sets the
* corresponding bit (proc index) in the reachable bitmap
* and can return addressing information for each proc
* that is passed back to the r2 on data transfer calls
*/
ompi_bitmap_clear_all_bits(reachable);
memset(bml_endpoints, 0, nprocs * sizeof(struct mca_bml_base_endpoint_t*));
memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*));
memset(bml_btls, 0, nprocs * sizeof(struct mca_bml_base_btl_t*));
rc = btl->btl_add_procs(btl, nprocs, procs, btl_endpoints, reachable);
if(OMPI_SUCCESS != rc) {
free(btl_endpoints);
return rc;
}
/* for each proc that is reachable - add the endpoint to the bml_endpoints array(s) */
for(p=0; p<nprocs; p++) {
if(ompi_bitmap_is_set_bit(reachable, p)) {
ompi_proc_t *proc = procs[p];
/* mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml; */
mca_bml_base_endpoint_t * bml_endpoint;
mca_bml_base_btl_t* bml_btl;
size_t size;
/* proc = (ompi_proc_t*) orte_hash_table_get_proc( */
/* &mca_bml_r2.procs, */
/* &proc->proc_name */
/* ); */
if(NULL != proc && NULL != proc->proc_pml) {
bml_endpoints[p] =(mca_bml_base_endpoint_t*) proc->proc_pml;
continue;
}
/* this btl can be used */
btl_inuse++;
/* allocate bml specific proc data */
bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
if (NULL == bml_endpoint) {
opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources");
free(btl_endpoints);
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* preallocate space in array for max number of r2s */
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules);
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules);
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
bml_endpoint->btl_proc = proc;
/*proc->proc_pml = (struct mca_pml_proc_t*) bml_endpoint;*/
/* dont allow an additional PTL with a lower exclusivity ranking */
size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
if(size > 0) {
bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, size-1);
/* skip this btl if the exclusivity is less than the previous */
if(bml_btl->btl->btl_exclusivity > btl->btl_exclusivity) {
if(btl_endpoints[p] != NULL) {
btl->btl_del_procs(btl, 1, &proc, &btl_endpoints[p]);
}
continue;
}
}
/* cache the endpoint on the proc */
bml_btl = mca_bml_base_btl_array_insert(&bml_endpoint->btl_send);
bml_btl->btl = btl;
bml_btl->btl_eager_limit = btl->btl_eager_limit;
bml_btl->btl_min_send_size = btl->btl_min_send_size;
bml_btl->btl_max_send_size = btl->btl_max_send_size;
bml_btl->btl_min_rdma_size = btl->btl_min_rdma_size;
bml_btl->btl_max_rdma_size = btl->btl_max_rdma_size;
bml_btl->btl_cache = NULL;
bml_btl->btl_endpoint = btl_endpoints[p];
bml_btl->btl_weight = 0;
bml_btl->btl_alloc = btl->btl_alloc;
bml_btl->btl_free = btl->btl_free;
bml_btl->btl_prepare_src = btl->btl_prepare_src;
bml_btl->btl_prepare_dst = btl->btl_prepare_dst;
bml_btl->btl_send = btl->btl_send;
bml_btl->btl_put = btl->btl_put;
bml_btl->btl_get = btl->btl_get;
bml_btl->btl_progress = btl->btl_component->btl_progress;
bml_endpoints[p]=bml_endpoint;
proc->proc_pml = (mca_pml_proc_t*) bml_endpoint;
/* orte_hash_table_set_proc( */
/* &mca_bml_r2.procs, */
/* &proc->proc_name, */
/* proc); */
}
}
if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) {
size_t p;
bool found = false;
for(p=0; p<mca_bml_r2.num_btl_progress; p++) {
if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
found = true;
break;
}
}
if(found == false) {
mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress] =
btl->btl_component->btl_progress;
mca_bml_r2.num_btl_progress++;
}
}
}
free(btl_endpoints);
/* iterate back through procs and compute metrics for registered r2s */
for(p=0; p<nprocs; p++) {
ompi_proc_t *proc = procs[p];
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
double total_bandwidth = 0;
uint32_t latency = 0;
size_t n_index;
size_t n_size;
/* skip over procs w/ no ptls registered */
if(NULL == bml_endpoint)
continue;
/* (1) determine the total bandwidth available across all r2s
* note that we need to do this here, as we may already have r2s configured
* (2) determine the highest priority ranking for latency
*/
n_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
for(n_index = 0; n_index < n_size; n_index++) {
mca_bml_base_btl_t* bml_btl =
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
mca_btl_base_module_t* btl = bml_btl->btl;
total_bandwidth += bml_btl->btl->btl_bandwidth;
if(btl->btl_latency > latency)
latency = btl->btl_latency;
}
/* (1) set the weight of each btl as a percentage of overall bandwidth
* (2) copy all btl instances at the highest priority ranking into the
* list of btls used for first fragments
*/
for(n_index = 0; n_index < n_size; n_index++) {
mca_bml_base_btl_t* bml_btl =
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
mca_btl_base_module_t *btl = bml_btl->btl;
double weight;
/* compute weighting factor for this r2 */
if(btl->btl_bandwidth)
weight = btl->btl_bandwidth / total_bandwidth;
else
weight = 1.0 / n_size;
bml_btl->btl_weight = (int)(weight * 100);
/* check to see if this r2 is already in the array of r2s
* used for first fragments - if not add it.
*/
if(btl->btl_latency == latency) {
mca_bml_base_btl_t* bml_btl_new =
mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager);
*bml_btl_new = *bml_btl;
}
/* check flags - is rdma prefered */
if(btl->btl_flags & MCA_BTL_FLAGS_RDMA &&
proc->proc_arch == ompi_proc_local_proc->proc_arch) {
mca_bml_base_btl_t* bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
*bml_btl_rdma = *bml_btl;
if(bml_endpoint->btl_rdma_offset < bml_btl_rdma->btl_min_rdma_size) {
bml_endpoint->btl_rdma_offset = bml_btl_rdma->btl_min_rdma_size;
}
}
}
}
return OMPI_SUCCESS;
}
/*
* iterate through each proc and notify any PTLs associated
* with the proc that it is/has gone away
*/
int mca_bml_r2_del_procs(size_t nprocs,
struct ompi_proc_t** procs)
{
size_t p;
int rc;
for(p = 0; p < nprocs; p++) {
ompi_proc_t *proc = procs[p];
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
size_t f_index, f_size;
size_t n_index, n_size;
/* notify each ptl that the proc is going away */
f_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_eager);
for(f_index = 0; f_index < f_size; f_index++) {
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_eager, f_index);
mca_btl_base_module_t* btl = bml_btl->btl;
rc = btl->btl_del_procs(btl,1,&proc,&bml_btl->btl_endpoint);
if(OMPI_SUCCESS != rc) {
return rc;
}
/* remove this from next array so that we dont call it twice w/
* the same address pointer
*/
n_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_eager);
for(n_index = 0; n_index < n_size; n_index++) {
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
if(bml_btl->btl == btl) {
memset(bml_btl, 0, sizeof(mca_bml_base_btl_t));
break;
}
}
}
/* notify each r2 that was not in the array of r2s for first fragments */
n_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
for(n_index = 0; n_index < n_size; n_index++) {
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_eager, n_index);
mca_btl_base_module_t* btl = bml_btl->btl;
if (btl != 0) {
rc = btl->btl_del_procs(btl,1,&proc,&bml_btl->btl_endpoint);
if(OMPI_SUCCESS != rc)
return rc;
}
}
/* do any required cleanup */
OBJ_RELEASE(bml_endpoint);
}
return OMPI_SUCCESS;
}
int mca_bml_r2_finalize( void ) {
return OMPI_SUCCESS; /* TODO */
}
int mca_bml_r2_register(
mca_btl_base_tag_t tag,
mca_bml_base_module_recv_cb_fn_t cbfunc,
void* data
)
{
uint32_t i;
int rc;
mca_btl_base_module_t *btl;
for(i = 0; i < mca_bml_r2.num_btl_modules; i++) {
btl = mca_bml_r2.btl_modules[i];
rc = btl->btl_register(btl, tag, cbfunc, data);
if(OMPI_SUCCESS != rc)
return rc;
}
return OMPI_SUCCESS;
}
int mca_bml_r2_component_fini(void)
{
/* FIX */
return OMPI_SUCCESS;
}

95
ompi/mca/bml/r2/bml_r2.h Обычный файл
Просмотреть файл

@ -0,0 +1,95 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* BML Management Layer (BML)
*
*/
#include "mca/mca.h"
#include "mca/btl/btl.h"
#ifndef MCA_BML_R2_H
#define MCA_BML_R2_H
#include "include/types.h"
#include "class/ompi_free_list.h"
#include "mca/bml/bml.h"
typedef mca_bml_base_module_recv_cb_fn_t mca_bml_r2_recv_reg_t;
void mca_bml_r2_recv_callback(
mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t *des,
void* cbdata);
/**
* BML module interface functions and attributes.
*/
struct mca_bml_r2_module_t {
mca_bml_base_module_t super;
size_t num_btl_modules;
mca_btl_base_module_t** btl_modules;
size_t num_btl_progress;
mca_btl_base_component_progress_fn_t * btl_progress;
mca_bml_r2_recv_reg_t r2_reg[256];
opal_hash_table_t procs;
};
typedef struct mca_bml_r2_module_t mca_bml_r2_module_t;
extern mca_bml_r2_module_t mca_bml_r2;
extern int mca_bml_r2_component_open(void);
extern int mca_bml_r2_component_close(void);
extern mca_bml_base_module_t* mca_bml_r2_component_init(
int* priority,
bool enable_progress_threads,
bool enable_mpi_threads
);
extern int mca_bml_r2_progress(void);
int mca_bml_r2_add_procs(
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_bml_base_endpoint_t** bml_endpoints,
struct ompi_bitmap_t* reachable
);
int mca_bml_r2_del_procs(
size_t nprocs,
struct ompi_proc_t** procs
);
int mca_bml_r2_register(
mca_btl_base_tag_t tag,
mca_bml_base_module_recv_cb_fn_t cbfunc,
void* data
);
int mca_bml_r2_finalize( void );
#endif /* OMPI_MCA_BML_R2_H */

23
ompi/mca/bml/r2/bml_r2_btl.c Обычный файл
Просмотреть файл

@ -0,0 +1,23 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "mca/bml/bml.h"
#include "bml_r2_btl.h"

92
ompi/mca/bml/r2/bml_r2_btl.h Обычный файл
Просмотреть файл

@ -0,0 +1,92 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_BML_R2_BTL_H
#define MCA_BML_R2_BTL_H
#include "opal/util/output.h"
#include "mca/btl/btl.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* Allocate a descriptor for control message
*/
#if OMPI_HAVE_THREAD_SUPPORT
#define MCA_BML_R2_BTL_DES_ALLOC(btl, descriptor, size) \
do { \
if(NULL != (descriptor = btl->btl_cache)) { \
/* atomically acquire the cached descriptor */ \
if(opal_atomic_cmpset_ptr(&btl->btl_cache, descriptor, NULL) == 0) { \
btl->btl_cache = NULL; \
} else { \
descriptor = btl->btl_alloc(btl->btl, sizeof(mca_bml_r2_hdr_t) + \
MCA_BTL_DES_MAX_SEGMENTS * sizeof(mca_btl_base_segment_t)); \
} \
} else { \
descriptor = btl->btl_alloc(btl->btl, sizeof(mca_bml_r2_hdr_t) + \
MCA_BTL_DES_MAX_SEGMENTS * sizeof(mca_btl_base_segment_t)); \
} \
descriptor->des_src->seg_len = size; \
} while(0)
#else
#define MCA_BML_R2_BTL_DES_ALLOC(btl, descriptor, size) \
do { \
if(NULL != (descriptor = btl->btl_cache)) { \
btl->btl_cache = NULL; \
} else { \
descriptor = btl->btl_alloc(btl->btl, sizeof(mca_bml_r2_hdr_t) + \
MCA_BTL_DES_MAX_SEGMENTS * sizeof(mca_btl_base_segment_t)); \
} \
descriptor->des_src->seg_len = size; \
} while(0)
#endif
/**
* Return a descriptor
*/
#if OMPI_HAVE_THREAD_SUPPORT
#define MCA_BML_R2_BTL_DES_RETURN(btl, descriptor) \
do { \
if(NULL == btl->btl_cache) { \
if(opal_atomic_cmpset_ptr(&btl->btl_cache,NULL,descriptor) == 0) { \
btl->btl_free(btl->btl,descriptor); \
} \
} else { \
btl->btl_free(btl->btl,descriptor); \
}
} while(0)
#else
#define MCA_BML_R2_BTL_DES_RETURN(btl, descriptor) \
do { \
if(NULL == btl->btl_cache) { \
btl->btl_cache = descriptor; \
} else { \
btl->btl_free(endpoint->btl,descriptor); \
} \
} while(0)
#endif
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

97
ompi/mca/bml/r2/bml_r2_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,97 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "include/sys/cache.h"
#include "opal/event/event.h"
#include "mpi.h"
#include "mca/bml/bml.h"
#include "mca/btl/btl.h"
#include "mca/btl/base/base.h"
#include "mca/base/mca_base_param.h"
#include "bml_r2.h"
mca_bml_base_component_1_0_0_t mca_bml_r2_component = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a bml v1.0.0 component (which also implies
a specific MCA version) */
MCA_BML_BASE_VERSION_1_0_0,
"r2", /* MCA component name */
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_bml_r2_component_open, /* component open */
mca_bml_r2_component_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
mca_bml_r2_component_init
};
static inline int mca_bml_r2_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("bml","r2",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
int mca_bml_r2_component_open(void)
{
return OMPI_SUCCESS;
}
int mca_bml_r2_component_close(void)
{
int rc;
/* OBJ_DESTRUCT(&mca_bml_r2.lock); */
return OMPI_SUCCESS;
}
mca_bml_base_module_t* mca_bml_r2_component_init(
int* priority,
bool enable_progress_threads,
bool enable_mpi_threads
)
{
/* initialize BTLs */
if(OMPI_SUCCESS != mca_btl_base_select(enable_progress_threads,enable_mpi_threads))
return NULL;
OBJ_CONSTRUCT(&mca_bml_r2.procs, opal_hash_table_t);
*priority = 100;
return &mca_bml_r2.super;
}

Просмотреть файл

@ -177,6 +177,7 @@ struct mca_btl_base_descriptor_t {
size_t des_dst_cnt;
mca_btl_base_completion_fn_t des_cbfunc;
void* des_cbdata;
void* des_context;
int32_t des_flags;
};
typedef struct mca_btl_base_descriptor_t mca_btl_base_descriptor_t;

Просмотреть файл

@ -32,6 +32,7 @@
#include "pml_ob1_sendreq.h"
#include "pml_ob1_recvreq.h"
#include "pml_ob1_rdmafrag.h"
#include "mca/bml/base/base.h"
mca_pml_ob1_t mca_pml_ob1 = {
{
@ -49,20 +50,21 @@ mca_pml_ob1_t mca_pml_ob1 = {
mca_pml_ob1_send,
mca_pml_ob1_iprobe,
mca_pml_ob1_probe,
mca_pml_ob1_start,
mca_pml_ob1_start,
32768,
(0x7fffffff)
}
};
static int mca_pml_ob1_add_btls( void );
int mca_pml_ob1_enable(bool enable)
{
int rc;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint32_t proc_arch;
#endif
if( false == enable ) return OMPI_SUCCESS;
OBJ_CONSTRUCT(&mca_pml_ob1.lock, opal_mutex_t);
/* requests */
@ -108,15 +110,12 @@ int mca_pml_ob1_enable(bool enable)
NULL);
OBJ_CONSTRUCT(&mca_pml_ob1.buffers, ompi_free_list_t);
/* pending operations */
OBJ_CONSTRUCT(&mca_pml_ob1.send_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_ob1.recv_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_ob1.acks_pending, opal_list_t);
if(OMPI_SUCCESS != (rc = mca_pml_ob1_add_btls()) )
return rc;
mca_pml_ob1.enabled = true;
return OMPI_SUCCESS;
}
@ -124,6 +123,7 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
{
/* allocate pml specific comm data */
mca_pml_ob1_comm_t* pml_comm = OBJ_NEW(mca_pml_ob1_comm_t);
mca_pml_ob1_proc_t* pml_proc = NULL;
int i;
if (NULL == pml_comm) {
@ -131,14 +131,17 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
}
mca_pml_ob1_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count);
comm->c_pml_comm = pml_comm;
comm->c_pml_procs = (mca_pml_ob1_proc_t**)malloc(
comm->c_remote_group->grp_proc_count * sizeof(mca_pml_ob1_proc_t));
comm->c_pml_procs = (mca_pml_proc_t**)malloc(
comm->c_remote_group->grp_proc_count * sizeof(mca_pml_proc_t));
if(NULL == comm->c_pml_procs) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(i=0; i<comm->c_remote_group->grp_proc_count; i++)
comm->c_pml_procs[i] = comm->c_remote_group->grp_proc_pointers[i]->proc_pml;
for(i=0; i<comm->c_remote_group->grp_proc_count; i++){
pml_proc = OBJ_NEW(mca_pml_ob1_proc_t);
pml_proc->base.proc_ompi = comm->c_remote_group->grp_proc_pointers[i];
comm->c_pml_procs[i] = (mca_pml_proc_t*) pml_proc; /* comm->c_remote_group->grp_proc_pointers[i]->proc_pml; */
}
return OMPI_SUCCESS;
}
@ -152,86 +155,6 @@ int mca_pml_ob1_del_comm(ompi_communicator_t* comm)
return OMPI_SUCCESS;
}
static int btl_exclusivity_compare(const void* arg1, const void* arg2)
{
mca_btl_base_module_t* btl1 = *(struct mca_btl_base_module_t**)arg1;
mca_btl_base_module_t* btl2 = *(struct mca_btl_base_module_t**)arg2;
if( btl1->btl_exclusivity > btl2->btl_exclusivity ) {
return -1;
} else if (btl1->btl_exclusivity == btl2->btl_exclusivity ) {
return 0;
} else {
return 1;
}
}
static int mca_pml_ob1_add_btls( void )
{
/* build an array of ob1s and ob1 modules */
opal_list_t* btls = &mca_btl_base_modules_initialized;
mca_btl_base_selected_module_t* selected_btl;
size_t num_btls = opal_list_get_size(btls);
mca_pml_ob1.num_btl_modules = 0;
mca_pml_ob1.num_btl_progress = 0;
mca_pml_ob1.num_btl_components = 0;
mca_pml_ob1.btl_modules = (mca_btl_base_module_t **)malloc(sizeof(mca_btl_base_module_t*) * num_btls);
mca_pml_ob1.btl_progress = (mca_btl_base_component_progress_fn_t*)malloc(sizeof(mca_btl_base_component_progress_fn_t) * num_btls);
mca_pml_ob1.btl_components = (mca_btl_base_component_t **)malloc(sizeof(mca_btl_base_component_t*) * num_btls);
if (NULL == mca_pml_ob1.btl_modules ||
NULL == mca_pml_ob1.btl_progress ||
NULL == mca_pml_ob1.btl_components) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(selected_btl = (mca_btl_base_selected_module_t*)opal_list_get_first(btls);
selected_btl != (mca_btl_base_selected_module_t*)opal_list_get_end(btls);
selected_btl = (mca_btl_base_selected_module_t*)opal_list_get_next(selected_btl)) {
mca_btl_base_module_t *btl = selected_btl->btl_module;
size_t i;
int rc;
mca_pml_ob1.btl_modules[mca_pml_ob1.num_btl_modules++] = btl;
for(i=0; i<mca_pml_ob1.num_btl_components; i++) {
if(mca_pml_ob1.btl_components[i] == btl->btl_component) {
break;
}
}
/* override eager limit larger than our max */
if(btl->btl_eager_limit > mca_pml_ob1.eager_limit) {
btl->btl_eager_limit = mca_pml_ob1.eager_limit;
}
/* setup callback for receive */
rc = btl->btl_register(btl, MCA_BTL_TAG_PML, mca_pml_ob1_recv_frag_callback, NULL);
if(OMPI_SUCCESS != rc)
return rc;
if(i == mca_pml_ob1.num_btl_components) {
mca_pml_ob1.btl_components[mca_pml_ob1.num_btl_components++] = btl->btl_component;
}
}
/* initialize free list of receive buffers */
ompi_free_list_init(
&mca_pml_ob1.buffers,
sizeof(mca_pml_ob1_buffer_t) + mca_pml_ob1.eager_limit,
OBJ_CLASS(mca_pml_ob1_buffer_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL);
/* sort ob1 list by exclusivity */
qsort(mca_pml_ob1.btl_modules,
mca_pml_ob1.num_btl_modules,
sizeof(struct mca_btl_base_module_t*),
btl_exclusivity_compare);
return OMPI_SUCCESS;
}
/*
* For each proc setup a datastructure that indicates the PTLs
@ -243,7 +166,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
{
size_t p;
ompi_bitmap_t reachable;
struct mca_btl_base_endpoint_t** btl_endpoints = NULL;
struct mca_bml_base_endpoint_t ** bml_endpoints = NULL;
int rc;
size_t p_index;
@ -255,173 +178,36 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
if(OMPI_SUCCESS != rc)
return rc;
/* attempt to add all procs to each ob1 */
btl_endpoints = (struct mca_btl_base_endpoint_t **)malloc(nprocs * sizeof(struct mca_btl_base_endpoint_t*));
for(p_index = 0; p_index < mca_pml_ob1.num_btl_modules; p_index++) {
mca_btl_base_module_t* btl = mca_pml_ob1.btl_modules[p_index];
int btl_inuse = 0;
/* if the ob1 can reach the destination proc it sets the
* corresponding bit (proc index) in the reachable bitmap
* and can return addressing information for each proc
* that is passed back to the ob1 on data transfer calls
*/
ompi_bitmap_clear_all_bits(&reachable);
memset(btl_endpoints, 0, nprocs * sizeof(struct mca_ob1_base_endpoint_t*));
rc = btl->btl_add_procs(btl, nprocs, procs, btl_endpoints, &reachable);
if(OMPI_SUCCESS != rc) {
free(btl_endpoints);
return rc;
}
/* for each proc that is reachable - add the ob1 to the procs array(s) */
for(p=0; p<nprocs; p++) {
if(ompi_bitmap_is_set_bit(&reachable, p)) {
ompi_proc_t *proc = procs[p];
mca_pml_ob1_proc_t* proc_pml = proc->proc_pml;
mca_pml_ob1_endpoint_t* endpoint;
size_t size;
/* this btl can be used */
btl_inuse++;
/* initialize each proc */
if(NULL == proc_pml) {
/* allocate pml specific proc data */
proc_pml = OBJ_NEW(mca_pml_ob1_proc_t);
if (NULL == proc_pml) {
opal_output(0, "mca_pml_ob1_add_procs: unable to allocate resources");
free(btl_endpoints);
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* preallocate space in array for max number of ob1s */
mca_pml_ob1_ep_array_reserve(&proc_pml->btl_eager, mca_pml_ob1.num_btl_modules);
mca_pml_ob1_ep_array_reserve(&proc_pml->btl_send, mca_pml_ob1.num_btl_modules);
mca_pml_ob1_ep_array_reserve(&proc_pml->btl_rdma, mca_pml_ob1.num_btl_modules);
proc_pml->proc_ompi = proc;
proc->proc_pml = proc_pml;
}
/* dont allow an additional PTL with a lower exclusivity ranking */
size = mca_pml_ob1_ep_array_get_size(&proc_pml->btl_send);
if(size > 0) {
endpoint = mca_pml_ob1_ep_array_get_index(&proc_pml->btl_send, size-1);
/* skip this btl if the exclusivity is less than the previous */
if(endpoint->btl->btl_exclusivity > btl->btl_exclusivity) {
if(btl_endpoints[p] != NULL) {
btl->btl_del_procs(btl, 1, &proc, &btl_endpoints[p]);
}
continue;
}
}
/* cache the endpoint on the proc */
endpoint = mca_pml_ob1_ep_array_insert(&proc_pml->btl_send);
endpoint->btl = btl;
endpoint->btl_eager_limit = btl->btl_eager_limit;
endpoint->btl_min_send_size = btl->btl_min_send_size;
endpoint->btl_max_send_size = btl->btl_max_send_size;
endpoint->btl_min_rdma_size = btl->btl_min_rdma_size;
endpoint->btl_max_rdma_size = btl->btl_max_rdma_size;
endpoint->btl_cache = NULL;
endpoint->btl_endpoint = btl_endpoints[p];
endpoint->btl_weight = 0;
endpoint->btl_alloc = btl->btl_alloc;
endpoint->btl_free = btl->btl_free;
endpoint->btl_prepare_src = btl->btl_prepare_src;
endpoint->btl_prepare_dst = btl->btl_prepare_dst;
endpoint->btl_send = btl->btl_send;
endpoint->btl_put = btl->btl_put;
endpoint->btl_get = btl->btl_get;
endpoint->btl_progress = btl->btl_component->btl_progress;
}
}
if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) {
size_t p;
bool found = false;
for(p=0; p<mca_pml_ob1.num_btl_progress; p++) {
if(mca_pml_ob1.btl_progress[p] == btl->btl_component->btl_progress) {
found = true;
break;
}
}
if(found == false) {
mca_pml_ob1.btl_progress[mca_pml_ob1.num_btl_progress] =
btl->btl_component->btl_progress;
mca_pml_ob1.num_btl_progress++;
}
}
}
free(btl_endpoints);
/* iterate back through procs and compute metrics for registered ob1s */
for(p=0; p<nprocs; p++) {
ompi_proc_t *proc = procs[p];
mca_pml_ob1_proc_t* proc_pml = proc->proc_pml;
double total_bandwidth = 0;
uint32_t latency = 0;
size_t n_index;
size_t n_size;
/* skip over procs w/ no ob1s registered */
if(NULL == proc_pml)
continue;
/* (1) determine the total bandwidth available across all ob1s
* note that we need to do this here, as we may already have ob1s configured
* (2) determine the highest priority ranking for latency
*/
n_size = mca_pml_ob1_ep_array_get_size(&proc_pml->btl_send);
for(n_index = 0; n_index < n_size; n_index++) {
mca_pml_ob1_endpoint_t* endpoint =
mca_pml_ob1_ep_array_get_index(&proc_pml->btl_send, n_index);
mca_btl_base_module_t* ob1 = endpoint->btl;
total_bandwidth += endpoint->btl->btl_bandwidth;
if(ob1->btl_latency > latency)
latency = ob1->btl_latency;
}
/* (1) set the weight of each ob1 as a percentage of overall bandwidth
* (2) copy all ob1 instances at the highest priority ranking into the
* list of ob1s used for first fragments
*/
for(n_index = 0; n_index < n_size; n_index++) {
mca_pml_ob1_endpoint_t* endpoint =
mca_pml_ob1_ep_array_get_index(&proc_pml->btl_send, n_index);
mca_btl_base_module_t *ob1 = endpoint->btl;
double weight;
/* compute weighting factor for this ob1 */
if(ob1->btl_bandwidth)
weight = endpoint->btl->btl_bandwidth / total_bandwidth;
else
weight = 1.0 / n_size;
endpoint->btl_weight = (int)(weight * 100);
/* check to see if this ob1 is already in the array of ob1s
* used for first fragments - if not add it.
*/
if(ob1->btl_latency == latency) {
mca_pml_ob1_endpoint_t* ep_new =
mca_pml_ob1_ep_array_insert(&proc_pml->btl_eager);
*ep_new = *endpoint;
}
/* check flags - is rdma prefered */
if(endpoint->btl->btl_flags & MCA_BTL_FLAGS_RDMA &&
proc->proc_arch == ompi_proc_local_proc->proc_arch) {
mca_pml_ob1_endpoint_t* rdma_ep = mca_pml_ob1_ep_array_insert(&proc_pml->btl_rdma);
*rdma_ep = *endpoint;
if(proc_pml->proc_rdma_offset < rdma_ep->btl_min_rdma_size) {
proc_pml->proc_rdma_offset = rdma_ep->btl_min_rdma_size;
}
}
}
}
return OMPI_SUCCESS;
procs[p]->proc_arch = ompi_proc_local()->proc_arch;
}
rc = mca_bml.bml_add_procs(
nprocs,
procs,
bml_endpoints,
&reachable
);
if(OMPI_SUCCESS != rc)
return rc;
rc = mca_bml.bml_register(
MCA_BTL_TAG_PML,
mca_pml_ob1_recv_frag_callback,
NULL);
/* initialize free list of receive buffers */
ompi_free_list_init(
&mca_pml_ob1.buffers,
sizeof(mca_pml_ob1_buffer_t) + mca_pml_ob1.eager_limit,
OBJ_CLASS(mca_pml_ob1_buffer_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL);
return rc;
}
/*
@ -433,53 +219,7 @@ int mca_pml_ob1_del_procs(ompi_proc_t** procs, size_t nprocs)
{
size_t p;
int rc;
for(p = 0; p < nprocs; p++) {
ompi_proc_t *proc = procs[p];
mca_pml_ob1_proc_t* proc_pml = proc->proc_pml;
size_t f_index, f_size;
size_t n_index, n_size;
/* notify each ob1 that the proc is going away */
f_size = mca_pml_ob1_ep_array_get_size(&proc_pml->btl_eager);
for(f_index = 0; f_index < f_size; f_index++) {
mca_pml_ob1_endpoint_t* endpoint = mca_pml_ob1_ep_array_get_index(&proc_pml->btl_eager, f_index);
mca_btl_base_module_t* ob1 = endpoint->btl;
rc = ob1->btl_del_procs(ob1,1,&proc,&endpoint->btl_endpoint);
if(OMPI_SUCCESS != rc) {
return rc;
}
/* remove this from next array so that we dont call it twice w/
* the same address pointer
*/
n_size = mca_pml_ob1_ep_array_get_size(&proc_pml->btl_eager);
for(n_index = 0; n_index < n_size; n_index++) {
mca_pml_ob1_endpoint_t* endpoint = mca_pml_ob1_ep_array_get_index(&proc_pml->btl_send, n_index);
if(endpoint->btl == ob1) {
memset(endpoint, 0, sizeof(mca_pml_ob1_endpoint_t));
break;
}
}
}
/* notify each ob1 that was not in the array of ob1s for first fragments */
n_size = mca_pml_ob1_ep_array_get_size(&proc_pml->btl_send);
for(n_index = 0; n_index < n_size; n_index++) {
mca_pml_ob1_endpoint_t* endpoint = mca_pml_ob1_ep_array_get_index(&proc_pml->btl_eager, n_index);
mca_btl_base_module_t* ob1 = endpoint->btl;
if (ob1 != 0) {
rc = ob1->btl_del_procs(ob1,1,&proc,&endpoint->btl_endpoint);
if(OMPI_SUCCESS != rc)
return rc;
}
}
/* do any required cleanup */
OBJ_RELEASE(proc_pml);
proc->proc_pml = NULL;
}
return OMPI_SUCCESS;
return mca_bml.bml_del_procs(nprocs, procs);
}
int mca_pml_ob1_component_fini(void)

Просмотреть файл

@ -43,15 +43,6 @@ extern "C" {
struct mca_pml_ob1_t {
mca_pml_base_module_t super;
mca_btl_base_component_t **btl_components;
size_t num_btl_components;
mca_btl_base_module_t** btl_modules;
size_t num_btl_modules;
mca_btl_base_component_progress_fn_t* btl_progress;
size_t num_btl_progress;
int priority;
int free_list_num; /* initial size of free list */
int free_list_max; /* maximum size of free list */
@ -76,6 +67,7 @@ struct mca_pml_ob1_t {
opal_list_t send_pending;
opal_list_t recv_pending;
opal_list_t rdma_pending;
bool enabled;
};
typedef struct mca_pml_ob1_t mca_pml_ob1_t;

Просмотреть файл

@ -29,12 +29,12 @@ extern "C" {
struct mca_pml_ob1_comm_proc_t {
opal_object_t super;
uint16_t expected_sequence; /**< send message sequence number - receiver side */
opal_object_t super;
uint16_t expected_sequence; /**< send message sequence number - receiver side */
volatile uint32_t send_sequence; /**< send side sequence number */
opal_list_t frags_cant_match; /**< out-of-order fragment queues */
opal_list_t specific_receives; /**< queues of unmatched specific receives */
opal_list_t unexpected_frags; /**< unexpected fragment queues */
opal_list_t frags_cant_match; /**< out-of-order fragment queues */
opal_list_t specific_receives; /**< queues of unmatched specific receives */
opal_list_t unexpected_frags; /**< unexpected fragment queues */
};
typedef struct mca_pml_ob1_comm_proc_t mca_pml_ob1_comm_proc_t;

Просмотреть файл

@ -30,6 +30,7 @@
#include "pml_ob1_recvreq.h"
#include "pml_ob1_rdmafrag.h"
#include "pml_ob1_recvfrag.h"
#include "mca/bml/base/base.h"
mca_pml_base_component_1_0_0_t mca_pml_ob1_component = {
@ -79,13 +80,6 @@ int mca_pml_ob1_component_open(void)
{
int param, value;
mca_pml_ob1.btl_components = NULL;
mca_pml_ob1.num_btl_components = 0;
mca_pml_ob1.btl_modules = NULL;
mca_pml_ob1.num_btl_modules = 0;
mca_pml_ob1.btl_progress = NULL;
mca_pml_ob1.num_btl_progress = 0;
mca_pml_ob1.free_list_num =
mca_pml_ob1_param_register_int("free_list_num", 256);
mca_pml_ob1.free_list_max =
@ -101,12 +95,14 @@ int mca_pml_ob1_component_open(void)
mca_pml_ob1.recv_pipeline_depth =
mca_pml_ob1_param_register_int("recv_pipeline_depth", 4);
mca_base_param_register_int("mpi", NULL, "leave_pinned", "leave_pinned", 0);
param = mca_base_param_find("mpi", NULL, "leave_pinned");
mca_base_param_lookup_int(param, &value);
mca_pml_ob1.leave_pinned = value;
return mca_btl_base_open();
mca_pml_ob1.enabled = false;
return mca_bml_base_open();
}
@ -114,10 +110,10 @@ int mca_pml_ob1_component_close(void)
{
int rc;
if( NULL == mca_pml_ob1.btl_components ) /* I was not selected */
return OMPI_SUCCESS;
if(!mca_pml_ob1.enabled)
return OMPI_SUCCESS; /* never selected.. return success.. */
if(OMPI_SUCCESS != (rc = mca_btl_base_close()))
if(OMPI_SUCCESS != (rc = mca_bml_base_close()))
return rc;
OBJ_DESTRUCT(&mca_pml_ob1.acks_pending);
@ -145,19 +141,6 @@ int mca_pml_ob1_component_close(void)
}
#endif
if(NULL != mca_pml_ob1.btl_components) {
free(mca_pml_ob1.btl_components);
mca_pml_ob1.btl_components = NULL;
}
if(NULL != mca_pml_ob1.btl_modules) {
free(mca_pml_ob1.btl_modules);
mca_pml_ob1.btl_modules = NULL;
}
if(NULL != mca_pml_ob1.btl_progress) {
free(mca_pml_ob1.btl_progress);
mca_pml_ob1.btl_progress = NULL;
}
return OMPI_SUCCESS;
}
@ -174,10 +157,10 @@ mca_pml_base_module_t* mca_pml_ob1_component_init(int* priority,
return NULL;
}
/* initialize NTLs */
if(OMPI_SUCCESS != mca_btl_base_select(enable_progress_threads,enable_mpi_threads))
return NULL;
if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads, enable_mpi_threads))
return NULL;
return &mca_pml_ob1.super;
}

Просмотреть файл

@ -22,40 +22,3 @@
#include "pml_ob1_endpoint.h"
static void mca_pml_ob1_ep_array_construct(mca_pml_ob1_ep_array_t* array)
{
array->arr_endpoints = NULL;
array->arr_size = 0;
array->arr_index = 0;
array->arr_reserve = 0;
}
static void mca_pml_ob1_ep_array_destruct(mca_pml_ob1_ep_array_t* array)
{
if(NULL != array->arr_endpoints)
free(array->arr_endpoints);
}
OBJ_CLASS_INSTANCE(
mca_pml_ob1_ep_array_t,
opal_object_t,
mca_pml_ob1_ep_array_construct,
mca_pml_ob1_ep_array_destruct
);
int mca_pml_ob1_ep_array_reserve(mca_pml_ob1_ep_array_t* array, size_t size)
{
size_t old_len = sizeof(mca_pml_ob1_endpoint_t)*array->arr_reserve;
size_t new_len = sizeof(mca_pml_ob1_endpoint_t)*size;
if(old_len >= new_len)
return OMPI_SUCCESS;
array->arr_endpoints = realloc(array->arr_endpoints, new_len);
if(NULL == array->arr_endpoints)
return OMPI_ERR_OUT_OF_RESOURCE;
memset((unsigned char*)array->arr_endpoints + old_len, 0, new_len-old_len);
array->arr_reserve = size;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -25,214 +25,6 @@
extern "C" {
#endif
/**
* A data structure associated with a ompi_proc_t that caches
* addressing/scheduling attributes for a specific BTL instance
* that can be used to reach the process.
*/
struct mca_pml_ob1_endpoint_t {
int btl_weight; /**< BTL weight for scheduling */
int btl_flags; /**< support for put/get? */
size_t btl_eager_limit; /**< BTL eager limit */
size_t btl_min_send_size; /**< BTL min send size */
size_t btl_max_send_size; /**< BTL max send size */
size_t btl_min_rdma_size; /**< BTL min rdma size */
size_t btl_max_rdma_size; /**< BTL max rdma size */
struct mca_btl_base_module_t *btl; /**< BTL module */
struct mca_btl_base_endpoint_t* btl_endpoint; /**< BTL addressing info */
struct mca_btl_base_descriptor_t* btl_cache;
/* BTL function table */
mca_btl_base_module_alloc_fn_t btl_alloc;
mca_btl_base_module_free_fn_t btl_free;
mca_btl_base_module_send_fn_t btl_send;
mca_btl_base_module_prepare_fn_t btl_prepare_src;
mca_btl_base_module_prepare_fn_t btl_prepare_dst;
mca_btl_base_module_put_fn_t btl_put;
mca_btl_base_module_get_fn_t btl_get;
mca_btl_base_component_progress_fn_t btl_progress;
};
typedef struct mca_pml_ob1_endpoint_t mca_pml_ob1_endpoint_t;
/**
* A dynamically growable array of mca_pml_ob1_endpoint_t instances.
* Maintains an index into the array that is used for round-robin
* scheduling across contents.
*/
struct mca_pml_ob1_ep_array_t {
opal_object_t super;
size_t arr_size; /**< number available */
size_t arr_reserve; /**< size of allocated btl_proc array */
size_t arr_index; /**< last used index*/
mca_pml_ob1_endpoint_t* arr_endpoints; /**< array of btl endpoints */
};
typedef struct mca_pml_ob1_ep_array_t mca_pml_ob1_ep_array_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_ob1_ep_array_t);
/**
* If required, reallocate (grow) the array to the indicate size.
*
* @param array (IN)
* @param size (IN)
*/
int mca_pml_ob1_ep_array_reserve(mca_pml_ob1_ep_array_t*, size_t);
static inline size_t mca_pml_ob1_ep_array_get_size(mca_pml_ob1_ep_array_t* array)
{
return array->arr_size;
}
/**
* Grow the array if required, and set the size.
*
* @param array (IN)
* @param size (IN)
*/
static inline void mca_pml_ob1_ep_array_set_size(mca_pml_ob1_ep_array_t* array, size_t size)
{
if(array->arr_size > array->arr_reserve)
mca_pml_ob1_ep_array_reserve(array, size);
array->arr_size = size;
}
/**
* Grow the array size by one and return the item at that index.
*
* @param array (IN)
*/
static inline mca_pml_ob1_endpoint_t* mca_pml_ob1_ep_array_insert(mca_pml_ob1_ep_array_t* array)
{
#if OMPI_ENABLE_DEBUG
if(array->arr_size >= array->arr_reserve) {
opal_output(0, "mca_pml_ob1_ep_array_insert: invalid array index %d >= %d",
array->arr_size, array->arr_reserve);
return 0;
}
#endif
return &array->arr_endpoints[array->arr_size++];
}
/**
* Return an array item at the specified index.
*
* @param array (IN)
* @param index (IN)
*/
static inline mca_pml_ob1_endpoint_t* mca_pml_ob1_ep_array_get_index(mca_pml_ob1_ep_array_t* array, size_t index)
{
#if OMPI_ENABLE_DEBUG
if(index >= array->arr_size) {
opal_output(0, "mca_pml_ob1_ep_array_get_index: invalid array index %d >= %d",
index, array->arr_size);
return 0;
}
#endif
return &array->arr_endpoints[index];
}
/**
* Return the next LRU index in the array.
*
* @param array (IN)
* @param index (IN)
*/
static inline mca_pml_ob1_endpoint_t* mca_pml_ob1_ep_array_get_next(mca_pml_ob1_ep_array_t* array)
{
mca_pml_ob1_endpoint_t* endpoint;
#if OMPI_ENABLE_DEBUG
if(array->arr_size == 0) {
opal_output(0, "mca_pml_ob1_ep_array_get_next: invalid array size");
return 0;
}
#endif
endpoint = &array->arr_endpoints[array->arr_index++];
if(array->arr_index == array->arr_size) {
array->arr_index = 0;
}
return endpoint;
}
/**
* Locate an element in the array
*
* @param array (IN)
* @param index (IN)
*/
static inline mca_pml_ob1_endpoint_t* mca_pml_ob1_ep_array_find(
mca_pml_ob1_ep_array_t* array, struct mca_btl_base_module_t* btl)
{
size_t i=0;
for(i=0; i<array->arr_size; i++) {
if(array->arr_endpoints[i].btl == btl) {
return &array->arr_endpoints[i];
}
}
return NULL;
}
/**
* Allocate a descriptor for control message
*/
#if OMPI_HAVE_THREAD_SUPPORT
#define MCA_PML_OB1_ENDPOINT_DES_ALLOC(endpoint, descriptor, size) \
do { \
if(NULL != (descriptor = endpoint->btl_cache)) { \
/* atomically acquire the cached descriptor */ \
if(opal_atomic_cmpset_ptr(&endpoint->btl_cache, descriptor, NULL) == 0) { \
endpoint->btl_cache = NULL; \
} else { \
descriptor = endpoint->btl_alloc(endpoint->btl, sizeof(mca_pml_ob1_hdr_t) + \
MCA_BTL_DES_MAX_SEGMENTS * sizeof(mca_btl_base_segment_t)); \
} \
} else { \
descriptor = endpoint->btl_alloc(endpoint->btl, sizeof(mca_pml_ob1_hdr_t) + \
MCA_BTL_DES_MAX_SEGMENTS * sizeof(mca_btl_base_segment_t)); \
} \
descriptor->des_src->seg_len = size; \
} while(0)
#else
#define MCA_PML_OB1_ENDPOINT_DES_ALLOC(endpoint, descriptor, size) \
do { \
if(NULL != (descriptor = endpoint->btl_cache)) { \
endpoint->btl_cache = NULL; \
} else { \
descriptor = endpoint->btl_alloc(endpoint->btl, sizeof(mca_pml_ob1_hdr_t) + \
MCA_BTL_DES_MAX_SEGMENTS * sizeof(mca_btl_base_segment_t)); \
} \
descriptor->des_src->seg_len = size; \
} while(0)
#endif
/**
* Return a descriptor
*/
#if OMPI_HAVE_THREAD_SUPPORT
#define MCA_PML_OB1_ENDPOINT_DES_RETURN(endpoint, descriptor) \
do { \
if(NULL == endpoint->btl_cache) { \
if(opal_atomic_cmpset_ptr(&endpoint->btl_cache,NULL,descriptor) == 0) { \
endpoint->btl_free(endpoint->btl,descriptor); \
} \
} else { \
endpoint->btl_free(endpoint->btl,descriptor); \
} \
} while(0)
#else
#define MCA_PML_OB1_ENDPOINT_DES_RETURN(endpoint, descriptor) \
do { \
if(NULL == endpoint->btl_cache) { \
endpoint->btl_cache = descriptor; \
} else { \
endpoint->btl_free(endpoint->btl,descriptor); \
} \
} while(0)
#endif
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -32,7 +32,7 @@ int mca_pml_ob1_isend_init(void *buf,
ompi_request_t ** request)
{
int rc;
mca_pml_ob1_send_request_t *sendreq = NULL;
MCA_PML_OB1_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc);
if (rc != OMPI_SUCCESS)
@ -64,7 +64,7 @@ int mca_pml_ob1_isend(void *buf,
MCA_PML_OB1_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc);
if (rc != OMPI_SUCCESS)
return rc;
MCA_PML_OB1_SEND_REQUEST_INIT(sendreq,
buf,
count,
@ -91,14 +91,14 @@ int mca_pml_ob1_send(void *buf,
MCA_PML_OB1_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc);
if (rc != OMPI_SUCCESS)
return rc;
MCA_PML_OB1_SEND_REQUEST_INIT(sendreq,
buf,
count,
datatype,
dst, tag,
comm, sendmode, false);
buf,
count,
datatype,
dst, tag,
comm, sendmode, false);
MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc);
if (rc != OMPI_SUCCESS) {
MCA_PML_OB1_FREE((ompi_request_t **) & sendreq);

Просмотреть файл

@ -23,28 +23,21 @@
static void mca_pml_ob1_proc_construct(mca_pml_ob1_proc_t* proc)
{
proc->proc_ompi = NULL;
proc->proc_rdma_offset = 0;
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
OBJ_CONSTRUCT(&proc->btl_eager, mca_pml_ob1_ep_array_t);
OBJ_CONSTRUCT(&proc->btl_send, mca_pml_ob1_ep_array_t);
OBJ_CONSTRUCT(&proc->btl_rdma, mca_pml_ob1_ep_array_t);
proc->base.proc_ompi = NULL;
OBJ_CONSTRUCT(&proc->base.proc_lock, opal_mutex_t);
}
static void mca_pml_ob1_proc_destruct(mca_pml_ob1_proc_t* proc)
{
OBJ_DESTRUCT(&proc->proc_lock);
OBJ_DESTRUCT(&proc->btl_eager);
OBJ_DESTRUCT(&proc->btl_send);
OBJ_DESTRUCT(&proc->btl_rdma);
OBJ_DESTRUCT(&proc->base.proc_lock);
}
OBJ_CLASS_INSTANCE(
mca_pml_ob1_proc_t,
opal_object_t,
mca_pml_ob1_proc_construct,
mca_pml_ob1_proc_destruct
opal_list_item_t,
mca_pml_ob1_proc_construct,
mca_pml_ob1_proc_destruct
);

Просмотреть файл

@ -30,22 +30,13 @@ extern "C" {
#endif
/**
* Structure associated w/ ompi_proc_t that contains data specific
* to the PML.
* to the PML. Note that this name is not PML specific.
*/
struct mca_pml_proc_t {
opal_object_t super;
ompi_proc_t *proc_ompi; /**< back-pointer to ompi_proc_t */
opal_mutex_t proc_lock; /**< lock to protect against concurrent access */
int proc_flags; /**< prefered method of accessing this peer */
mca_pml_ob1_ep_array_t btl_eager; /**< array of endpoints to use for first fragments */
mca_pml_ob1_ep_array_t btl_send; /**< array of endpoints to use for remaining fragments */
mca_pml_ob1_ep_array_t btl_rdma; /**< array of endpoints that support (prefer) rdma */
size_t proc_rdma_offset; /**< max of min rdma size for available rmda btls */
struct mca_pml_ob1_proc_t {
mca_pml_proc_t base;
};
typedef struct mca_pml_proc_t mca_pml_ob1_proc_t;
OMPI_COMP_EXPORT OBJ_CLASS_DECLARATION(mca_pml_ob1_proc_t);
typedef struct mca_pml_ob1_proc_t mca_pml_ob1_proc_t;
OMPI_COMP_EXPORT extern opal_class_t mca_pml_ob1_proc_t_class;
/**
* Return the mca_pml_proc_t instance cached in the communicators local group.
@ -55,48 +46,48 @@ OMPI_COMP_EXPORT OBJ_CLASS_DECLARATION(mca_pml_ob1_proc_t);
* @return mca_pml_proc_t instance
*/
static inline mca_pml_ob1_proc_t* mca_pml_ob1_proc_lookup_local(ompi_communicator_t* comm, int rank)
{
return comm->c_local_group->grp_proc_pointers[rank]->proc_pml;
}
/* static inline mca_pml_ob1_proc_t* mca_pml_ob1_proc_lookup_local(ompi_communicator_t* comm, int rank) */
/* { */
/* return (mca_pml_ob1_proc_t*) comm->c_local_group->grp_proc_pointers[rank]->proc_pml; */
/* } */
/**
* Return the mca_pml_proc_t instance cached on the communicators remote group.
*
* @param comm Communicator
* @param rank Peer rank
* @return mca_pml_proc_t instance
*/
/* /\** */
/* * Return the mca_pml_proc_t instance cached on the communicators remote group. */
/* * */
/* * @param comm Communicator */
/* * @param rank Peer rank */
/* * @return mca_pml_proc_t instance */
/* *\/ */
static inline mca_pml_ob1_proc_t* mca_pml_ob1_proc_lookup_remote(ompi_communicator_t* comm, int rank)
{
return comm->c_pml_procs[rank];
}
/* static inline mca_pml_ob1_proc_t* mca_pml_ob1_proc_lookup_remote(ompi_communicator_t* comm, int rank) */
/* { */
/* return (mca_pml_ob1_proc_t*) comm->c_pml_procs[rank]; */
/* } */
/**
* Return the mca_btl_peer_t instance corresponding to the process/btl combination.
*
* @param comm Communicator
* @param rank Peer rank
* @return mca_pml_proc_t instance
*/
/* /\** */
/* * Return the mca_btl_peer_t instance corresponding to the process/btl combination. */
/* * */
/* * @param comm Communicator */
/* * @param rank Peer rank */
/* * @return mca_pml_proc_t instance */
/* *\/ */
static inline struct mca_btl_base_endpoint_t* mca_pml_ob1_proc_lookup_remote_endpoint(
ompi_communicator_t* comm,
int rank,
struct mca_btl_base_module_t* btl)
{
mca_pml_ob1_proc_t* proc = comm->c_pml_procs[rank];
size_t i, size = mca_pml_ob1_ep_array_get_size(&proc->btl_eager);
mca_pml_ob1_endpoint_t* endpoint = proc->btl_eager.arr_endpoints;
for(i = 0; i < size; i++) {
if(endpoint->btl == btl) {
return endpoint->btl_endpoint;
}
endpoint++;
}
return NULL;
}
/* static inline struct mca_btl_base_endpoint_t* mca_pml_ob1_proc_lookup_remote_endpoint( */
/* ompi_communicator_t* comm, */
/* int rank, */
/* struct mca_btl_base_module_t* btl) */
/* { */
/* mca_pml_ob1_proc_t* proc = (mca_pml_ob1_proc_t*) comm->c_pml_procs[rank]; */
/* size_t i, size = mca_pml_ob1_ep_array_get_size(&proc->btl_eager); */
/* mca_pml_ob1_endpoint_t* endpoint = proc->btl_eager.arr_endpoints; */
/* for(i = 0; i < size; i++) { */
/* if(endpoint->btl == btl) { */
/* return endpoint->btl_endpoint; */
/* } */
/* endpoint++; */
/* } */
/* return NULL; */
/* } */
#if defined(c_plusplus) || defined(__cplusplus)

Просмотреть файл

@ -18,22 +18,10 @@
#include "pml_ob1.h"
#include "pml_ob1_sendreq.h"
#include "mca/bml/base/base.h"
int mca_pml_ob1_progress(void)
{
size_t i;
int count = 0;
/*
* Progress each of the PTL modules
*/
for(i=0; i<mca_pml_ob1.num_btl_progress; i++) {
int rc = mca_pml_ob1.btl_progress[i]();
if(rc > 0) {
count += rc;
}
}
return count;
return mca_bml.bml_progress();
}

Просмотреть файл

@ -37,8 +37,8 @@ struct mca_pml_ob1_rdma_frag_t {
mca_pml_ob1_rdma_state_t rdma_state;
size_t rdma_length;
mca_btl_base_segment_t rdma_segs[MCA_BTL_DES_MAX_SEGMENTS];
struct mca_pml_ob1_endpoint_t* rdma_ep;
struct mca_pml_ob1_send_request_t* rdma_req;
struct mca_bml_base_endpoint_t* rdma_ep;
};
typedef struct mca_pml_ob1_rdma_frag_t mca_pml_ob1_rdma_frag_t;

Просмотреть файл

@ -55,10 +55,10 @@ OBJ_CLASS_INSTANCE(
*/
void mca_pml_ob1_recv_frag_callback(
mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* des,
void* cbdata)
mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* des,
void* cbdata)
{
mca_btl_base_segment_t* segments = des->des_dst;
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
@ -67,14 +67,14 @@ void mca_pml_ob1_recv_frag_callback(
}
switch(hdr->hdr_common.hdr_type) {
case MCA_PML_OB1_HDR_TYPE_MATCH:
case MCA_PML_OB1_HDR_TYPE_RNDV:
{
mca_pml_ob1_recv_frag_match(btl,&hdr->hdr_match,segments,des->des_dst_cnt);
case MCA_PML_OB1_HDR_TYPE_MATCH:
case MCA_PML_OB1_HDR_TYPE_RNDV:
{
mca_pml_ob1_recv_frag_match(btl, &hdr->hdr_match, segments,des->des_dst_cnt);
break;
}
case MCA_PML_OB1_HDR_TYPE_ACK:
{
}
case MCA_PML_OB1_HDR_TYPE_ACK:
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)
hdr->hdr_ack.hdr_src_req.pval;
sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
@ -84,23 +84,24 @@ void mca_pml_ob1_recv_frag_callback(
#endif
MCA_PML_OB1_SEND_REQUEST_ADVANCE(sendreq);
break;
}
case MCA_PML_OB1_HDR_TYPE_FRAG:
{
}
case MCA_PML_OB1_HDR_TYPE_FRAG:
{
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)
hdr->hdr_frag.hdr_dst_req.pval;
mca_pml_ob1_recv_request_progress(recvreq,btl,segments,des->des_dst_cnt);
mca_pml_ob1_recv_request_progress(recvreq,segments,des->des_dst_cnt);
break;
}
case MCA_PML_OB1_HDR_TYPE_PUT:
{
}
case MCA_PML_OB1_HDR_TYPE_PUT:
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)
hdr->hdr_rdma.hdr_src.pval;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
mca_pml_ob1_send_request_put(sendreq,btl,&hdr->hdr_rdma);
break;
}
case MCA_PML_OB1_HDR_TYPE_FIN:
{
}
case MCA_PML_OB1_HDR_TYPE_FIN:
{
mca_btl_base_descriptor_t* dst = (mca_btl_base_descriptor_t*)
hdr->hdr_fin.hdr_dst.pval;
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)dst->des_cbdata;
@ -109,15 +110,16 @@ void mca_pml_ob1_recv_frag_callback(
btl->btl_free(btl,dst);
recvreq->fin2[recvreq->fin_index] = get_profiler_timestamp();
recvreq->fin_index++;
mca_pml_ob1_recv_request_progress(recvreq,btl,segments,des->des_dst_cnt);
mca_pml_ob1_recv_request_progress(recvreq,segments,des->des_dst_cnt);
#else
mca_pml_ob1_recv_request_progress(recvreq,btl,segments,des->des_dst_cnt);
mca_pml_ob1_recv_request_progress(recvreq,segments,des->des_dst_cnt);
btl->btl_free(btl,dst);
#endif
break;
}
default:
break;
}
default:
break;
}
}
@ -409,10 +411,10 @@ static bool mca_pml_ob1_check_cantmatch_for_match(
* - this routine may be called simultaneously by more than one thread
*/
int mca_pml_ob1_recv_frag_match(
mca_btl_base_module_t* btl,
mca_pml_ob1_match_hdr_t *hdr,
mca_btl_base_segment_t* segments,
size_t num_segments)
mca_btl_base_module_t *btl,
mca_pml_ob1_match_hdr_t *hdr,
mca_btl_base_segment_t* segments,
size_t num_segments)
{
/* local variables */
uint16_t next_msg_seq_expected, frag_msg_seq;
@ -421,7 +423,6 @@ int mca_pml_ob1_recv_frag_match(
mca_pml_ob1_comm_t *comm;
mca_pml_ob1_comm_proc_t *proc;
bool additional_match=false;
bool is_probe = false;
opal_list_t additional_matches;
int rc;
@ -480,14 +481,12 @@ int mca_pml_ob1_recv_frag_match(
/* if match found, process data */
if (match) {
/*
* update delivered sequence number information, if needed.
*/
if( (match->req_recv.req_base.req_type == MCA_PML_REQUEST_PROBE) ||
match->req_recv.req_base.req_type == MCA_PML_REQUEST_IPROBE) {
if( (match->req_recv.req_base.req_type == MCA_PML_REQUEST_PROBE) ) {
/* Match a probe, rollback the next expected sequence number */
is_probe = true;
(proc->expected_sequence)--;
}
} else {
@ -499,7 +498,7 @@ int mca_pml_ob1_recv_frag_match(
OPAL_THREAD_UNLOCK(&comm->matching_lock);
return rc;
}
MCA_PML_OB1_RECV_FRAG_INIT(frag,btl,hdr,segments,num_segments);
MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments);
opal_list_append( &proc->unexpected_frags, (opal_list_item_t *)frag );
}
@ -524,7 +523,7 @@ int mca_pml_ob1_recv_frag_match(
OPAL_THREAD_UNLOCK(&comm->matching_lock);
return rc;
}
MCA_PML_OB1_RECV_FRAG_INIT(frag,btl,hdr,segments,num_segments);
MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments);
opal_list_append(&proc->frags_cant_match, (opal_list_item_t *)frag);
}
@ -534,28 +533,14 @@ int mca_pml_ob1_recv_frag_match(
/* release matching lock before processing fragment */
if(match != NULL) {
MCA_PML_OB1_RECV_REQUEST_MATCHED(match, hdr);
if (is_probe == false) {
mca_pml_ob1_recv_request_progress(match,btl,segments,num_segments);
} else {
/* mark probe as complete */
OPAL_THREAD_LOCK(&ompi_request_lock);
match->req_recv.req_base.req_ompi.req_status._count = match->req_recv.req_bytes_packed;
match->req_recv.req_base.req_pml_complete = true;
match->req_recv.req_base.req_ompi.req_complete = true;
if(ompi_request_waiting) {
opal_condition_broadcast(&ompi_request_cond);
}
OPAL_THREAD_UNLOCK(&ompi_request_lock);
/* retry the match */
mca_pml_ob1_recv_frag_match(btl,hdr,segments,num_segments);
}
mca_pml_ob1_recv_request_progress(match,segments,num_segments);
}
if(additional_match) {
opal_list_item_t* item;
while(NULL != (item = opal_list_remove_first(&additional_matches))) {
mca_pml_ob1_recv_frag_t* frag = (mca_pml_ob1_recv_frag_t*)item;
MCA_PML_OB1_RECV_REQUEST_MATCHED(frag->request, hdr);
mca_pml_ob1_recv_request_progress(frag->request,frag->btl,frag->segments,frag->num_segments);
mca_pml_ob1_recv_request_progress(frag->request,frag->segments,frag->num_segments);
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
}
}

Просмотреть файл

@ -21,6 +21,7 @@
#define MCA_PML_OB1_RECVFRAG_H
#include "mca/btl/btl.h"
#include "mca/bml/bml.h"
#include "pml_ob1_hdr.h"
struct mca_pml_ob1_buffer_t {
@ -34,7 +35,6 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_buffer_t);
struct mca_pml_ob1_recv_frag_t {
opal_list_item_t super;
mca_btl_base_module_t* btl;
mca_pml_ob1_hdr_t hdr;
struct mca_pml_ob1_recv_request_t* request;
size_t num_segments;
@ -46,25 +46,23 @@ typedef struct mca_pml_ob1_recv_frag_t mca_pml_ob1_recv_frag_t;
OBJ_CLASS_DECLARATION(mca_pml_ob1_recv_frag_t);
#define MCA_PML_OB1_RECV_FRAG_ALLOC(frag,rc) \
#define MCA_PML_OB1_RECV_FRAG_ALLOC(frag,rc) \
do { \
opal_list_item_t* item; \
OMPI_FREE_LIST_WAIT(&mca_pml_ob1.recv_frags, item, rc); \
frag = (mca_pml_ob1_recv_frag_t*)item; \
OMPI_FREE_LIST_WAIT(&mca_pml_ob1.recv_frags, item, rc); \
frag = (mca_pml_ob1_recv_frag_t*)item; \
} while(0)
#define MCA_PML_OB1_RECV_FRAG_INIT(frag,btl,hdr,segs,cnt) \
#define MCA_PML_OB1_RECV_FRAG_INIT(frag, hdr,segs,cnt) \
do { \
size_t i; \
mca_btl_base_segment_t* macro_segments = frag->segments; \
mca_btl_base_segment_t* macro_segments = frag->segments; \
mca_pml_ob1_buffer_t** buffers = frag->buffers; \
\
/* init recv_frag */ \
frag->btl = btl; \
/* init recv_frag */ \
frag->hdr = *(mca_pml_ob1_hdr_t*)hdr; \
frag->num_segments = cnt; \
\
/* copy over data */ \
for(i=0; i<cnt; i++) { \
opal_list_item_t* item; \
@ -72,8 +70,8 @@ do { \
OMPI_FREE_LIST_WAIT(&mca_pml_ob1.buffers, item, rc); \
buff = (mca_pml_ob1_buffer_t*)item; \
buffers[i] = buff; \
macro_segments[i].seg_addr.pval = buff->addr; \
macro_segments[i].seg_len = segs[i].seg_len; \
macro_segments[i].seg_addr.pval = buff->addr; \
macro_segments[i].seg_len = segs[i].seg_len; \
memcpy(buff->addr, \
segs[i].seg_addr.pval, \
segs[i].seg_len); \
@ -82,7 +80,7 @@ do { \
} while(0)
#define MCA_PML_OB1_RECV_FRAG_RETURN(frag) \
#define MCA_PML_OB1_RECV_FRAG_RETURN(frag) \
do { \
size_t i; \
\
@ -93,8 +91,8 @@ do { \
} \
frag->num_segments = 0; \
\
/* return recv_frag */ \
OMPI_FREE_LIST_RETURN(&mca_pml_ob1.recv_frags, \
/* return recv_frag */ \
OMPI_FREE_LIST_RETURN(&mca_pml_ob1.recv_frags, \
(opal_list_item_t*)frag); \
} while(0)
@ -104,11 +102,11 @@ do { \
*/
OMPI_DECLSPEC void mca_pml_ob1_recv_frag_callback(
mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata
);
mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata
);
/**
* Match incoming recv_frags against posted receives.
@ -121,10 +119,10 @@ OMPI_DECLSPEC void mca_pml_ob1_recv_frag_callback(
* @return OMPI_SUCCESS or error status on failure.
*/
OMPI_DECLSPEC int mca_pml_ob1_recv_frag_match(
mca_btl_base_module_t* btl,
mca_pml_ob1_match_hdr_t *hdr,
mca_btl_base_segment_t* segments,
size_t num_segments);
mca_btl_base_module_t* btl,
mca_pml_ob1_match_hdr_t *hdr,
mca_btl_base_segment_t* segments,
size_t num_segments);
#if defined(c_plusplus) || defined(__cplusplus)

Просмотреть файл

@ -17,13 +17,14 @@
#include "ompi_config.h"
#include "mca/pml/pml.h"
#include "mca/bml/bml.h"
#include "mca/btl/btl.h"
#include "mca/mpool/mpool.h"
#include "pml_ob1_comm.h"
#include "pml_ob1_recvreq.h"
#include "pml_ob1_recvfrag.h"
#include "pml_ob1_sendreq.h"
#include "mca/bml/base/base.h"
static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc(
mca_pml_ob1_recv_request_t* request, mca_pml_ob1_comm_proc_t* proc);
@ -102,13 +103,13 @@ OBJ_CLASS_INSTANCE(
*/
static void mca_pml_ob1_send_ctl_complete(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status)
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status)
{
mca_pml_ob1_endpoint_t* endpoint = (mca_pml_ob1_endpoint_t*)des->des_cbdata;
MCA_PML_OB1_ENDPOINT_DES_RETURN(endpoint, des);
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des);
}
@ -120,9 +121,10 @@ static void mca_pml_ob1_recv_request_ack(
mca_pml_ob1_recv_request_t* recvreq,
mca_pml_ob1_rendezvous_hdr_t* hdr)
{
mca_pml_ob1_proc_t* proc = recvreq->req_proc;
mca_pml_ob1_endpoint_t* ep;
ompi_proc_t* proc = (ompi_proc_t*) recvreq->req_proc;
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
mca_btl_base_descriptor_t* des;
mca_bml_base_btl_t* bml_btl;
mca_pml_ob1_recv_frag_t* frag;
mca_pml_ob1_ack_hdr_t* ack;
int rc;
@ -131,12 +133,12 @@ static void mca_pml_ob1_recv_request_ack(
if(NULL == proc) {
ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(
recvreq->req_recv.req_base.req_comm, hdr->hdr_match.hdr_src);
proc = recvreq->req_proc = ompi_proc->proc_pml;
proc = recvreq->req_proc = ompi_proc;
}
ep = mca_pml_ob1_ep_array_get_next(&proc->btl_eager);
bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager);
/* allocate descriptor */
MCA_PML_OB1_ENDPOINT_DES_ALLOC(ep, des, sizeof(mca_pml_ob1_ack_hdr_t));
MCA_BML_BASE_BTL_DES_ALLOC(bml_btl, des, sizeof(mca_pml_ob1_ack_hdr_t));
if(NULL == des) {
goto retry;
}
@ -161,7 +163,7 @@ static void mca_pml_ob1_recv_request_ack(
|| mca_pml_ob1.leave_pinned)) { /* BUG here! hdr_flags are 0! */
struct mca_mpool_base_reg_mpool_t *reg = recvreq->req_chunk->mpools;
while(reg->mpool != NULL) {
if(NULL != mca_pml_ob1_ep_array_find(&proc->btl_rdma,(mca_btl_base_module_t*) reg->user_data)) {
if(NULL != mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma,(mca_btl_base_module_t*) reg->user_data)) {
recvreq->req_mpool = reg;
break;
}
@ -174,18 +176,18 @@ static void mca_pml_ob1_recv_request_ack(
* - rdma devices are available
*/
if(NULL == recvreq->req_mpool && !mca_pml_ob1.leave_pinned) {
if(recvreq->req_recv.req_bytes_packed > proc->proc_rdma_offset &&
mca_pml_ob1_ep_array_get_size(&proc->btl_rdma) &&
if(recvreq->req_recv.req_bytes_packed > bml_endpoint->btl_rdma_offset &&
mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma) &&
ompi_convertor_need_buffers(&recvreq->req_recv.req_convertor) == 0) {
/* use convertor to figure out the rdma offset for this request */
recvreq->req_rdma_offset = proc->proc_rdma_offset;
recvreq->req_rdma_offset = bml_endpoint->btl_rdma_offset;
if(recvreq->req_rdma_offset < hdr->hdr_frag_length) {
recvreq->req_rdma_offset = hdr->hdr_frag_length;
}
ompi_convertor_set_position(
&recvreq->req_recv.req_convertor,
&recvreq->req_rdma_offset);
&recvreq->req_recv.req_convertor,
&recvreq->req_rdma_offset);
ack->hdr_rdma_offset = recvreq->req_rdma_offset;
} else {
recvreq->req_rdma_offset = recvreq->req_recv.req_bytes_packed;
@ -208,11 +210,11 @@ static void mca_pml_ob1_recv_request_ack(
/* initialize descriptor */
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
des->des_cbfunc = mca_pml_ob1_send_ctl_complete;
des->des_cbdata = ep;
des->des_cbdata = bml_btl;
rc = ep->btl_send(ep->btl, ep->btl_endpoint, des, MCA_BTL_TAG_PML);
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
if(rc != OMPI_SUCCESS) {
ep->btl_free(ep->btl,des);
mca_bml_base_free(bml_btl, des);
goto retry;
}
return;
@ -220,7 +222,6 @@ static void mca_pml_ob1_recv_request_ack(
/* queue request to retry later */
retry:
MCA_PML_OB1_RECV_FRAG_ALLOC(frag,rc);
frag->btl = NULL;
frag->hdr.hdr_rndv = *hdr;
frag->num_segments = 0;
frag->request = recvreq;
@ -235,7 +236,6 @@ retry:
void mca_pml_ob1_recv_request_progress(
mca_pml_ob1_recv_request_t* recvreq,
mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments,
size_t num_segments)
{
@ -350,18 +350,19 @@ void mca_pml_ob1_recv_request_progress(
void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
{
if(OPAL_THREAD_ADD32(&recvreq->req_lock,1) == 1) {
mca_pml_ob1_proc_t* proc = recvreq->req_proc;
size_t num_btl_avail = mca_pml_ob1_ep_array_get_size(&proc->btl_rdma);
ompi_proc_t* proc = (ompi_proc_t*) recvreq->req_proc;
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
mca_bml_base_btl_t* bml_btl;
do {
size_t bytes_remaining = recvreq->req_recv.req_bytes_packed - recvreq->req_rdma_offset;
while(bytes_remaining > 0 && recvreq->req_pipeline_depth < mca_pml_ob1.recv_pipeline_depth) {
mca_pml_ob1_endpoint_t* ep;
size_t hdr_size;
size_t size;
mca_pml_ob1_rdma_hdr_t* hdr;
mca_btl_base_descriptor_t* dst;
mca_btl_base_descriptor_t* ctl;
mca_mpool_base_registration_t * reg = NULL;
size_t num_btl_avail = bml_endpoint->btl_rdma.arr_size;
int rc;
/*
@ -370,12 +371,12 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
* available RDMA nics.
*/
if(recvreq->req_mpool == NULL && !mca_pml_ob1.leave_pinned) {
ep = mca_pml_ob1_ep_array_get_next(&proc->btl_rdma);
bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
/* if there is only one btl available or the size is less than
* than the min fragment size, schedule the rest via this btl
*/
if(num_btl_avail == 1 || bytes_remaining < ep->btl_min_rdma_size) {
if(num_btl_avail == 1 || bytes_remaining < bml_btl->btl_min_rdma_size) {
size = bytes_remaining;
/* otherwise attempt to give the BTL a percentage of the message
@ -384,27 +385,27 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
* previously assigned)
*/
} else {
size = (ep->btl_weight * bytes_remaining) / 100;
size = (bml_btl->btl_weight * bytes_remaining) / 100;
}
/* makes sure that we don't exceed BTL max rdma size */
if (ep->btl_max_rdma_size != 0 && size > ep->btl_max_rdma_size) {
size = ep->btl_max_rdma_size;
if (mca_bml.bml_max_rdma_size != 0 && size > mca_bml.bml_max_rdma_size) {
size = bml_btl->btl_max_rdma_size;
}
/*
* For now schedule entire message across a single NIC - need to FIX
*/
/*
* For now schedule entire message across a single NIC - need to FIX
*/
} else {
size = bytes_remaining;
if(NULL != recvreq->req_mpool){
/* find the endpoint corresponding to this btl and schedule the entire message */
ep = mca_pml_ob1_ep_array_find(&proc->btl_rdma,
(mca_btl_base_module_t*) recvreq->req_mpool->user_data);
bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma,
(mca_btl_base_module_t*) recvreq->req_mpool->user_data);
reg = recvreq->req_mpool->mpool_registration;
}
else{
ep = mca_pml_ob1_ep_array_get_next(&proc->btl_rdma);
bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
}
}
@ -413,13 +414,13 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
#if MCA_PML_OB1_TIMESTAMPS
recvreq->pin1[recvreq->pin_index] = get_profiler_timestamp();
#endif
dst = ep->btl_prepare_dst(
ep->btl,
ep->btl_endpoint,
reg,
&recvreq->req_recv.req_convertor,
0,
&size);
mca_bml_base_prepare_dst(
bml_btl,
reg,
&recvreq->req_recv.req_convertor,
0,
&size,
&dst);
#if MCA_PML_OB1_TIMESTAMPS
recvreq->pin2[recvreq->pin_index] = get_profiler_timestamp();
recvreq->pin_index++;
@ -438,9 +439,9 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
hdr_size += (sizeof(mca_btl_base_segment_t) * (dst->des_dst_cnt-1));
}
MCA_PML_OB1_ENDPOINT_DES_ALLOC(ep, ctl, hdr_size);
MCA_BML_BASE_BTL_DES_ALLOC(bml_btl, ctl, hdr_size);
if(ctl == NULL) {
ep->btl_free(ep->btl,dst);
mca_bml_base_free(bml_btl,dst);
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
opal_list_append(&mca_pml_ob1.recv_pending, (opal_list_item_t*)recvreq);
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
@ -448,7 +449,7 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
}
ctl->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
ctl->des_cbfunc = mca_pml_ob1_send_ctl_complete;
ctl->des_cbdata = ep;
ctl->des_cbdata = bml_btl;
/* fill in rdma header */
hdr = (mca_pml_ob1_rdma_hdr_t*)ctl->des_src->seg_addr.pval;
@ -465,12 +466,12 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,1);
/* send rdma request to peer */
rc = ep->btl_send(ep->btl, ep->btl_endpoint, ctl, MCA_BTL_TAG_PML);
rc = mca_bml_base_send(bml_btl, ctl, MCA_BTL_TAG_PML);
if(rc == OMPI_SUCCESS) {
bytes_remaining -= size;
} else {
ep->btl_free(ep->btl,ctl);
ep->btl_free(ep->btl,dst);
mca_bml_base_free(bml_btl,ctl);
mca_bml_base_free(bml_btl,dst);
recvreq->req_rdma_offset -= size;
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
@ -507,20 +508,10 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
(frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
OPAL_THREAD_UNLOCK(&comm->matching_lock);
mca_pml_ob1_recv_request_progress(request,frag->segments,frag->num_segments);
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
} else {
/* mark probe as complete */
OPAL_THREAD_LOCK(&ompi_request_lock);
request->req_recv.req_base.req_ompi.req_status._count = frag->hdr.hdr_match.hdr_msg_length;
request->req_recv.req_base.req_pml_complete = true;
request->req_recv.req_base.req_ompi.req_complete = true;
if(ompi_request_waiting) {
opal_condition_broadcast(&ompi_request_cond);
}
OPAL_THREAD_UNLOCK(&ompi_request_lock);
}
return; /* match found */
}
@ -571,20 +562,10 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
if ((frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
OPAL_THREAD_UNLOCK(&comm->matching_lock);
mca_pml_ob1_recv_request_progress(request,frag->segments,frag->num_segments);
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
} else {
/* mark probe as complete */
OPAL_THREAD_LOCK(&ompi_request_lock);
request->req_recv.req_base.req_ompi.req_status._count = frag->hdr.hdr_match.hdr_msg_length;
request->req_recv.req_base.req_pml_complete = true;
request->req_recv.req_base.req_ompi.req_complete = true;
if(ompi_request_waiting) {
opal_condition_broadcast(&ompi_request_cond);
}
OPAL_THREAD_UNLOCK(&ompi_request_lock);
}
return; /* match found */
}

Просмотреть файл

@ -37,7 +37,7 @@ typedef struct mca_pml_ob1_registration_t mca_pml_ob1_registration_t;
struct mca_pml_ob1_recv_request_t {
mca_pml_base_recv_request_t req_recv;
struct mca_pml_proc_t *req_proc;
struct ompi_proc_t *req_proc;
struct mca_mpool_base_chunk_t* req_chunk;
struct mca_mpool_base_reg_mpool_t* req_mpool;
ompi_ptr_t req_send;
@ -216,7 +216,7 @@ do {
ompi_comm_peer_lookup( \
(request)->req_recv.req_base.req_comm, (hdr)->hdr_src); \
\
(request)->req_proc = proc->proc_pml; \
(request)->req_proc = proc; \
ompi_convertor_copy_and_prepare_for_recv( proc->proc_convertor, \
(request)->req_recv.req_base.req_datatype, \
(request)->req_recv.req_base.req_count, \
@ -280,7 +280,6 @@ do {
void mca_pml_ob1_recv_request_progress(
mca_pml_ob1_recv_request_t* req,
mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments,
size_t num_segments);

Просмотреть файл

@ -31,7 +31,7 @@
#include "pml_ob1_rdmafrag.h"
#include "pml_ob1_recvreq.h"
#include "pml_ob1_endpoint.h"
#include "mca/bml/base/base.h"
static int mca_pml_ob1_send_request_fini(struct ompi_request_t** request)
@ -76,13 +76,13 @@ OBJ_CLASS_INSTANCE(
*/
static void mca_pml_ob1_match_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status)
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
mca_pml_ob1_endpoint_t* btl_ep = sendreq->req_endpoint;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
/* check completion status */
if(OMPI_SUCCESS != status) {
@ -92,7 +92,7 @@ static void mca_pml_ob1_match_completion(
}
/* attempt to cache the descriptor */
MCA_PML_OB1_ENDPOINT_DES_RETURN(btl_ep,descriptor);
MCA_BML_BASE_BTL_DES_RETURN( bml_btl , descriptor );
/* signal request completion */
OPAL_THREAD_LOCK(&ompi_request_lock);
@ -112,8 +112,7 @@ static void mca_pml_ob1_rndv_completion(
int status)
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
mca_pml_ob1_endpoint_t* btl_ep = sendreq->req_endpoint;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
/* check completion status */
if(OMPI_SUCCESS != status) {
/* TSW - FIX */
@ -131,7 +130,7 @@ static void mca_pml_ob1_rndv_completion(
#endif
/* return the descriptor */
btl_ep->btl_free(btl_ep->btl, descriptor);
mca_bml_base_free(bml_btl, descriptor);
/* update pipeline depth */
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,-1);
@ -156,7 +155,7 @@ static void mca_pml_ob1_frag_completion(
int status)
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
mca_pml_ob1_endpoint_t* btl_ep = sendreq->req_endpoint;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
bool schedule;
/* check completion status */
@ -176,7 +175,7 @@ static void mca_pml_ob1_frag_completion(
#endif
/* return the descriptor */
btl_ep->btl_free(btl_ep->btl, descriptor);
mca_bml_base_free(bml_btl, descriptor);
/* check for request completion */
OPAL_THREAD_LOCK(&ompi_request_lock);
@ -203,20 +202,28 @@ static void mca_pml_ob1_frag_completion(
*/
int mca_pml_ob1_send_request_start(
mca_pml_ob1_send_request_t* sendreq,
mca_pml_ob1_endpoint_t* endpoint)
mca_pml_ob1_send_request_t* sendreq
)
{
mca_btl_base_descriptor_t* descriptor;
mca_btl_base_segment_t* segment;
mca_pml_ob1_hdr_t* hdr;
mca_bml_base_endpoint_t* bml_endpoint = sendreq->bml_endpoint;
if(NULL == bml_endpoint) {
opal_output("[%s:%d:%s] no endpoint found for given destination.\n", __FILE__, __LINE__, __func__);
}
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager);
size_t size = sendreq->req_send.req_bytes_packed;
int rc;
/* shortcut for zero byte */
if(size == 0 && sendreq->req_send.req_send_mode != MCA_PML_BASE_SEND_SYNCHRONOUS) {
/* allocate a descriptor */
MCA_PML_OB1_ENDPOINT_DES_ALLOC(endpoint, descriptor, sizeof(mca_pml_ob1_match_hdr_t));
MCA_BML_BASE_BTL_DES_ALLOC(bml_btl, descriptor, sizeof(mca_pml_ob1_match_hdr_t));
if(NULL == descriptor) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -247,8 +254,8 @@ int mca_pml_ob1_send_request_start(
bool ack = false;
/* determine first fragment size */
if(size > endpoint->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t)) {
size = endpoint->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t);
if(size > bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t)) {
size = bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t);
ack = true;
} else if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
ack = true;
@ -259,7 +266,8 @@ int mca_pml_ob1_send_request_start(
int32_t free_after;
/* allocate descriptor */
descriptor = endpoint->btl_alloc(endpoint->btl, sizeof(mca_pml_ob1_match_hdr_t) + size);
mca_bml_base_alloc(bml_btl, &descriptor, sizeof(mca_pml_ob1_match_hdr_t) + size);
if(NULL == descriptor) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -276,7 +284,7 @@ int mca_pml_ob1_send_request_start(
&iov_count,
&max_data,
&free_after)) < 0) {
endpoint->btl_free(endpoint->btl, descriptor);
mca_bml_base_free(bml_btl, descriptor);
return rc;
}
@ -307,7 +315,7 @@ int mca_pml_ob1_send_request_start(
int32_t free_after;
/* allocate space for hdr + first fragment */
descriptor = endpoint->btl_alloc(endpoint->btl, sizeof(mca_pml_ob1_rendezvous_hdr_t) + size);
mca_bml_base_alloc(bml_btl, &descriptor, sizeof(mca_pml_ob1_rendezvous_hdr_t) + size);
if(NULL == descriptor) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -332,7 +340,7 @@ int mca_pml_ob1_send_request_start(
&iov_count,
&max_data,
&free_after)) < 0) {
endpoint->btl_free(endpoint->btl, descriptor);
mca_bml_base_free(bml_btl , descriptor);
return rc;
}
if(max_data != size) {
@ -374,13 +382,11 @@ int mca_pml_ob1_send_request_start(
#if MCA_PML_OB1_TIMESTAMPS
sendreq->t_start = get_profiler_timestamp();
#endif
rc = endpoint->btl_send(
endpoint->btl,
endpoint->btl_endpoint,
descriptor,
MCA_BTL_TAG_PML);
rc = mca_bml_base_send(bml_btl,
descriptor,
MCA_BTL_TAG_PML);
if(OMPI_SUCCESS != rc) {
endpoint->btl_free(endpoint->btl,descriptor);
mca_bml_base_free(bml_btl, descriptor );
}
return rc;
}
@ -398,25 +404,25 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
* of the number of times the routine has been called and run through
* the scheduling logic once for every call.
*/
mca_bml_base_endpoint_t* bml_endpoint = sendreq->bml_endpoint;
if(OPAL_THREAD_ADD32(&sendreq->req_lock,1) == 1) {
mca_pml_ob1_proc_t* proc = sendreq->req_proc;
size_t num_btl_avail = mca_pml_ob1_ep_array_get_size(&proc->btl_send);
do {
/* allocate remaining bytes to BTLs */
size_t bytes_remaining = sendreq->req_rdma_offset - sendreq->req_send_offset;
while(bytes_remaining > 0 &&
(sendreq->req_pipeline_depth < mca_pml_ob1.send_pipeline_depth ||
sendreq->req_rdma_offset < sendreq->req_send.req_bytes_packed)) {
mca_pml_ob1_endpoint_t* ep = mca_pml_ob1_ep_array_get_next(&proc->btl_send);
mca_pml_ob1_frag_hdr_t* hdr;
mca_btl_base_descriptor_t* des;
int rc;
/* if there is only one btl available or the size is less than
* than the min fragment size, schedule the rest via this btl
*/
size_t size;
if(num_btl_avail == 1 || bytes_remaining < ep->btl_min_send_size) {
size_t size;
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_send);
size_t num_btl_avail = bml_endpoint->btl_rdma.arr_size;
if(num_btl_avail == 1 || bytes_remaining < bml_btl->btl_min_send_size) {
size = bytes_remaining;
/* otherwise attempt to give the BTL a percentage of the message
@ -425,25 +431,29 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
* previously assigned)
*/
} else {
size = (ep->btl_weight * bytes_remaining) / 100;
size = (bml_btl->btl_weight * bytes_remaining) / 100;
}
/* makes sure that we don't exceed BTL max send size */
if (ep->btl_max_send_size != 0 &&
size > ep->btl_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t)) {
size = ep->btl_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t);
if (bml_btl->btl_max_send_size != 0 &&
size > bml_btl->btl_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t)) {
size = bml_btl->btl_max_send_size - sizeof(mca_pml_ob1_frag_hdr_t);
}
/* pack into a descriptor */
ompi_convertor_set_position(&sendreq->req_send.req_convertor,
&sendreq->req_send_offset);
des = ep->btl_prepare_src(
ep->btl,
ep->btl_endpoint,
NULL,
&sendreq->req_send.req_convertor,
sizeof(mca_pml_ob1_frag_hdr_t),
&size);
&sendreq->req_send_offset);
mca_bml_base_prepare_src(
bml_btl,
NULL,
&sendreq->req_send.req_convertor,
sizeof(mca_pml_ob1_frag_hdr_t),
&size,
&des
);
if(des == NULL) {
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
opal_list_append(&mca_pml_ob1.send_pending, (opal_list_item_t*)sendreq);
@ -467,13 +477,14 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,1);
/* initiate send - note that this may complete before the call returns */
rc = ep->btl_send(ep->btl, ep->btl_endpoint, des, MCA_BTL_TAG_PML);
rc = mca_bml_base_send( bml_btl, des, MCA_BTL_TAG_PML);
if(rc == OMPI_SUCCESS) {
bytes_remaining -= size;
} else {
sendreq->req_send_offset -= size;
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,-1);
ep->btl_free(ep->btl,des);
mca_bml_base_free(bml_btl,des);
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
opal_list_append(&mca_pml_ob1.send_pending, (opal_list_item_t*)sendreq);
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
@ -483,7 +494,7 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
if(bytes_remaining == 0)
sendreq->t_scheduled = get_profiler_timestamp();
#endif
mca_pml_ob1_progress();
mca_pml_ob1_progress();
}
} while (OPAL_THREAD_ADD32(&sendreq->req_lock,-1) > 0);
}
@ -503,9 +514,10 @@ static void mca_pml_ob1_fin_completion(
{
mca_pml_ob1_rdma_frag_t* frag = (mca_pml_ob1_rdma_frag_t*)des->des_cbdata;
mca_pml_ob1_endpoint_t* endpoint = frag->rdma_ep;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
MCA_PML_OB1_ENDPOINT_DES_RETURN(endpoint, des);
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des);
}
/**
@ -524,6 +536,7 @@ static void mca_pml_ob1_put_completion(
mca_pml_ob1_send_request_t* sendreq = frag->rdma_req;
mca_btl_base_descriptor_t* fin;
mca_pml_ob1_fin_hdr_t* hdr;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
int rc;
/* check completion status */
@ -554,7 +567,7 @@ static void mca_pml_ob1_put_completion(
*/
frag->rdma_state = MCA_PML_OB1_RDMA_FIN;
MCA_PML_OB1_ENDPOINT_DES_ALLOC(frag->rdma_ep, fin, sizeof(mca_pml_ob1_fin_hdr_t));
MCA_BML_BASE_BTL_DES_ALLOC(bml_btl, fin, sizeof(mca_pml_ob1_fin_hdr_t));
if(NULL == fin) {
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
@ -575,13 +588,13 @@ static void mca_pml_ob1_put_completion(
hdr->hdr_rdma_length = frag->rdma_length;
/* queue request */
rc = btl->btl_send(
btl,
ep,
fin,
MCA_BTL_TAG_PML);
rc = mca_bml_base_send(
bml_btl,
fin,
MCA_BTL_TAG_PML
);
if(OMPI_SUCCESS != rc) {
btl->btl_free(btl, fin);
mca_bml_base_free(bml_btl, fin);
if(rc == OMPI_ERR_OUT_OF_RESOURCE) {
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
@ -599,7 +612,7 @@ cleanup:
*/
des->des_dst = NULL;
des->des_dst_cnt = 0;
btl->btl_free(btl, des);
mca_bml_base_free(bml_btl, des);
}
@ -612,19 +625,20 @@ cleanup:
*/
void mca_pml_ob1_send_request_put(
mca_pml_ob1_send_request_t* sendreq,
mca_btl_base_module_t* btl,
mca_pml_ob1_rdma_hdr_t* hdr)
mca_pml_ob1_send_request_t* sendreq,
mca_btl_base_module_t* btl,
mca_pml_ob1_rdma_hdr_t* hdr)
{
mca_pml_ob1_proc_t* proc = sendreq->req_proc;
mca_pml_ob1_endpoint_t* ep = mca_pml_ob1_ep_array_find(&proc->btl_rdma,btl);
ompi_proc_t* proc = sendreq->req_proc;
mca_mpool_base_registration_t* reg = NULL;
mca_bml_base_btl_t* bml_btl;
mca_btl_base_descriptor_t* des;
mca_pml_ob1_rdma_frag_t* frag;
size_t offset = hdr->hdr_rdma_offset;
size_t i, size = 0;
int rc;
bml_btl = mca_bml_base_btl_array_find(&sendreq->bml_endpoint->btl_rdma, btl);
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag, rc);
if(NULL == frag) {
/* TSW - FIX */
@ -639,7 +653,7 @@ void mca_pml_ob1_send_request_put(
}
frag->rdma_hdr.hdr_rdma = *hdr;
frag->rdma_req = sendreq;
frag->rdma_ep = ep;
frag->rdma_ep = sendreq->bml_endpoint;
frag->rdma_state = MCA_PML_OB1_RDMA_PREPARE;
/* look for a prior registration on this interface */
@ -662,13 +676,15 @@ void mca_pml_ob1_send_request_put(
/* setup descriptor */
ompi_convertor_set_position(&sendreq->req_send.req_convertor, &offset);
des = btl->btl_prepare_src(
btl,
ep->btl_endpoint,
reg,
&sendreq->req_send.req_convertor,
0,
&size);
mca_bml_base_prepare_src(
bml_btl,
reg,
&sendreq->req_send.req_convertor,
0,
&size,
&des);
if(NULL == des) {
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
@ -688,8 +704,8 @@ void mca_pml_ob1_send_request_put(
if(sendreq->t_put_index >= MCA_PML_OB1_NUM_TSTAMPS)
sendreq->t_put_index = 0;
#endif
if(OMPI_SUCCESS != (rc = btl->btl_put(btl, ep->btl_endpoint, des))) {
if(OMPI_SUCCESS != (rc = mca_bml_base_put(bml_btl, des))) {
if(rc == OMPI_ERR_OUT_OF_RESOURCE) {
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);

Просмотреть файл

@ -26,6 +26,7 @@
#include "pml_ob1_comm.h"
#include "pml_ob1_hdr.h"
#include "datatype/convertor.h"
#include "mca/bml/bml.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
@ -34,8 +35,8 @@ extern "C" {
struct mca_pml_ob1_send_request_t {
mca_pml_base_send_request_t req_send;
mca_pml_ob1_proc_t* req_proc;
mca_pml_ob1_endpoint_t* req_endpoint;
ompi_proc_t* req_proc;
mca_bml_base_endpoint_t* bml_endpoint;
volatile int32_t req_state;
struct mca_mpool_base_chunk_t* req_chunk;
ompi_ptr_t req_recv;
@ -70,7 +71,8 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
sendreq, \
rc) \
{ \
mca_pml_ob1_proc_t *proc = comm->c_pml_procs[dst]; \
ompi_proc_t *proc = \
comm->c_pml_procs[dst]->proc_ompi; \
opal_list_item_t* item; \
\
if(NULL == proc) { \
@ -159,14 +161,9 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
#define MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc) \
{ \
mca_pml_ob1_endpoint_t* endpoint; \
mca_pml_ob1_proc_t* proc = sendreq->req_proc; \
mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \
\
MCA_PML_OB1_SEND_REQUEST_TSTAMPS_INIT(sendreq); \
\
/* select next endpoint */ \
endpoint = mca_pml_ob1_ep_array_get_next(&proc->btl_eager); \
sendreq->req_lock = 0; \
sendreq->req_pipeline_depth = 0; \
sendreq->req_bytes_delivered = 0; \
@ -178,13 +175,13 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
sendreq->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( \
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); \
sendreq->req_endpoint = endpoint; \
sendreq->bml_endpoint = (mca_bml_base_endpoint_t*) sendreq->req_proc->proc_pml; \
\
/* handle buffered send */ \
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \
mca_pml_base_bsend_request_start(&sendreq->req_send.req_base.req_ompi); \
} \
rc = mca_pml_ob1_send_request_start(sendreq, endpoint); \
rc = mca_pml_ob1_send_request_start( sendreq ); \
}
@ -321,8 +318,8 @@ do { \
*/
int mca_pml_ob1_send_request_start(
mca_pml_ob1_send_request_t* sendreq,
mca_pml_ob1_endpoint_t* endpoint);
mca_pml_ob1_send_request_t* sendreq
);
/**
* Schedule additional fragments

Просмотреть файл

@ -76,6 +76,14 @@ extern "C" {
struct mca_ptl_base_modulet;
struct mca_ptl_addr_t;
struct mca_pml_proc_t {
opal_list_item_t super;
ompi_proc_t *proc_ompi; /**< back-pointer to ompi_proc_t */
opal_mutex_t proc_lock; /**< lock to protect against concurrent access */
int proc_flags; /**< prefered method of accessing this peer */
};
typedef struct mca_pml_proc_t mca_pml_proc_t;
typedef enum {
MCA_PML_BASE_SEND_STANDARD,

Просмотреть файл

@ -35,7 +35,7 @@ extern opal_class_t mca_pml_teg_ptl_array_t_class;
struct mca_ptl_proc_t {
int ptl_weight; /**< PTL weight for scheduling */
struct mca_ptl_base_peer_t* ptl_peer; /**< PTL addressing info */
struct mca_pml_teg_ptl_t* ptl_base; /**< PML specific PTL info */
struct mca_pml_base_ptl_t* ptl_base; /**< PML specific PTL info */
mca_ptl_base_module_t *ptl; /**< PTL module */
};
typedef struct mca_ptl_proc_t mca_ptl_proc_t;

Просмотреть файл

@ -52,9 +52,7 @@ mca_pml_teg_t mca_pml_teg = {
mca_pml_teg_send,
mca_pml_teg_iprobe,
mca_pml_teg_probe,
mca_pml_teg_start,
32768,
(0x7fffffff)
mca_pml_teg_start
}
};
@ -172,7 +170,8 @@ static int mca_pml_teg_add_ptls(void)
int mca_pml_teg_enable(bool enable)
{
size_t i=0;
int value = enable;
int value = enable, rc;
uint32_t proc_arch;
/* If I'm not selected then prepare for close */
if( false == enable ) return OMPI_SUCCESS;
@ -186,6 +185,13 @@ int mca_pml_teg_enable(bool enable)
mca_pml_teg.teg_free_list_inc,
NULL );
/* I get selected. Publish my information */
proc_arch = ompi_proc_local()->proc_arch;
proc_arch = htonl(proc_arch);
rc = mca_pml_base_modex_send(&mca_pml_teg_component.pmlm_version, &proc_arch, sizeof(proc_arch));
if(rc != OMPI_SUCCESS)
return rc;
/* Grab all the PTLs and prepare them */
mca_pml_teg_add_ptls();
@ -222,6 +228,20 @@ int mca_pml_teg_add_procs(ompi_proc_t** procs, size_t nprocs)
if(OMPI_SUCCESS != rc)
return rc;
/* iterate through each of the procs and set the peers architecture */
for(p=0; p<nprocs; p++) {
uint32_t* proc_arch;
size_t size = sizeof(uint32_t);
rc = mca_pml_base_modex_recv(&mca_pml_teg_component.pmlm_version, procs[p],
(void**)&proc_arch, &size);
if(rc != OMPI_SUCCESS)
return rc;
if(size != sizeof(uint32_t))
return OMPI_ERROR;
procs[p]->proc_arch = ntohl(*proc_arch);
free(proc_arch);
}
/* attempt to add all procs to each ptl */
ptl_peers = (struct mca_ptl_base_peer_t **)malloc(nprocs * sizeof(struct mca_ptl_base_peer_t*));
for(p_index = 0; p_index < mca_pml_teg.teg_num_ptl_modules; p_index++) {
@ -245,7 +265,7 @@ int mca_pml_teg_add_procs(ompi_proc_t** procs, size_t nprocs)
for(p=0; p<nprocs; p++) {
if(ompi_bitmap_is_set_bit(&reachable, p)) {
ompi_proc_t *proc = procs[p];
mca_pml_proc_t* proc_pml = proc->proc_pml;
mca_pml_teg_proc_t* proc_pml = (mca_pml_teg_proc_t*) proc->proc_pml;
mca_ptl_proc_t* proc_ptl;
size_t size;
@ -266,8 +286,8 @@ int mca_pml_teg_add_procs(ompi_proc_t** procs, size_t nprocs)
/* preallocate space in array for max number of ptls */
mca_ptl_array_reserve(&proc_pml->proc_ptl_first, mca_pml_teg.teg_num_ptl_modules);
mca_ptl_array_reserve(&proc_pml->proc_ptl_next, mca_pml_teg.teg_num_ptl_modules);
proc_pml->proc_ompi = proc;
proc->proc_pml = proc_pml;
proc_pml->base.proc_ompi = proc;
proc->proc_pml = (mca_pml_proc_t*) proc_pml;
}
/* dont allow an additional PTL with a lower exclusivity ranking */
@ -312,7 +332,7 @@ int mca_pml_teg_add_procs(ompi_proc_t** procs, size_t nprocs)
/* iterate back through procs and compute metrics for registered ptls */
for(p=0; p<nprocs; p++) {
ompi_proc_t *proc = procs[p];
mca_pml_proc_t* proc_pml = proc->proc_pml;
mca_pml_teg_proc_t* proc_pml = (mca_pml_teg_proc_t*) proc->proc_pml;
double total_bandwidth = 0;
uint32_t latency = 0;
size_t n_index;
@ -342,7 +362,7 @@ int mca_pml_teg_add_procs(ompi_proc_t** procs, size_t nprocs)
for(n_index = 0; n_index < n_size; n_index++) {
struct mca_ptl_proc_t* proc_ptl = mca_ptl_array_get_index(&proc_pml->proc_ptl_next, n_index);
mca_ptl_base_module_t *ptl = proc_ptl->ptl;
struct mca_ptl_base_module_t *ptl = proc_ptl->ptl;
double weight;
/* compute weighting factor for this ptl */
@ -355,17 +375,17 @@ int mca_pml_teg_add_procs(ompi_proc_t** procs, size_t nprocs)
/*
* save/create ptl extension for use by pml
*/
if (NULL == ptl->ptl_base &&
proc_ptl->ptl_base = ptl->ptl_base;
if (NULL == proc_ptl->ptl_base &&
ptl->ptl_cache_bytes > 0 &&
NULL != ptl->ptl_request_init &&
NULL != ptl->ptl_request_fini) {
mca_pml_teg_ptl_t* ptl_base = OBJ_NEW(mca_pml_teg_ptl_t);
mca_pml_base_ptl_t* ptl_base = OBJ_NEW(mca_pml_base_ptl_t);
ptl_base->ptl = ptl;
ptl_base->ptl_cache_size = ptl->ptl_cache_size;
ptl->ptl_base = (struct mca_pml_base_ptl_t*)ptl_base;
proc_ptl->ptl_base = ptl->ptl_base = ptl_base;
}
proc_ptl->ptl_base = (mca_pml_teg_ptl_t*)ptl->ptl_base;
/* check to see if this ptl is already in the array of ptls used for first
* fragments - if not add it.
@ -391,7 +411,7 @@ int mca_pml_teg_del_procs(ompi_proc_t** procs, size_t nprocs)
int rc;
for(p = 0; p < nprocs; p++) {
ompi_proc_t *proc = procs[p];
mca_pml_proc_t* proc_pml = proc->proc_pml;
mca_pml_teg_proc_t* proc_pml = (mca_pml_teg_proc_t*) proc->proc_pml;
size_t f_index, f_size;
size_t n_index, n_size;

Просмотреть файл

@ -105,7 +105,7 @@ int mca_pml_teg_component_open(void)
mca_pml_teg.teg_priority =
mca_pml_teg_param_register_int("priority", 1);
return OMPI_SUCCESS;
return mca_ptl_base_open();
}

Просмотреть файл

@ -22,11 +22,11 @@
#include "pml_ptl_array.h"
static void mca_pml_teg_proc_construct(mca_pml_proc_t* proc)
static void mca_pml_teg_proc_construct(mca_pml_teg_proc_t* proc)
{
proc->proc_ompi = NULL;
proc->base.proc_ompi = NULL;
proc->proc_ptl_flags = 0;
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
OBJ_CONSTRUCT(&proc->base.proc_lock, opal_mutex_t);
OBJ_CONSTRUCT(&proc->proc_ptl_first, mca_pml_teg_ptl_array_t);
OBJ_CONSTRUCT(&proc->proc_ptl_next, mca_pml_teg_ptl_array_t);
@ -36,13 +36,13 @@ static void mca_pml_teg_proc_construct(mca_pml_proc_t* proc)
}
static void mca_pml_teg_proc_destruct(mca_pml_proc_t* proc)
static void mca_pml_teg_proc_destruct(mca_pml_teg_proc_t* proc)
{
OPAL_THREAD_LOCK(&mca_pml_teg.teg_lock);
opal_list_remove_item(&mca_pml_teg.teg_procs, (opal_list_item_t*)proc);
OPAL_THREAD_UNLOCK(&mca_pml_teg.teg_lock);
OBJ_DESTRUCT(&proc->proc_lock);
OBJ_DESTRUCT(&proc->base.proc_lock);
OBJ_DESTRUCT(&proc->proc_ptl_first);
OBJ_DESTRUCT(&proc->proc_ptl_next);
}

Просмотреть файл

@ -32,19 +32,16 @@ extern "C" {
* Structure associated w/ ompi_proc_t that contains data specific
* to the PML. Note that this name is not PML specific.
*/
struct mca_pml_proc_t {
opal_list_item_t super;
ompi_proc_t *proc_ompi; /**< back-pointer to ompi_proc_t */
opal_mutex_t proc_lock; /**< lock to protect against concurrent access */
mca_ptl_array_t proc_ptl_first; /**< array of ptls to use for first fragments */
mca_ptl_array_t proc_ptl_next; /**< array of ptls to use for remaining fragments */
uint32_t proc_ptl_flags; /**< aggregate ptl flags */
struct mca_pml_teg_proc_t {
mca_pml_proc_t base;
mca_ptl_array_t proc_ptl_first; /**< array of ptls to use for first fragments */
mca_ptl_array_t proc_ptl_next; /**< array of ptls to use for remaining fragments */
uint32_t proc_ptl_flags; /**< aggregate ptl flags */
};
typedef struct mca_pml_proc_t mca_pml_proc_t;
typedef struct mca_pml_teg_proc_t mca_pml_teg_proc_t;
OMPI_COMP_EXPORT extern opal_class_t mca_pml_teg_proc_t_class;
typedef struct mca_pml_proc_t mca_pml_teg_proc_t;
/**
* Return the mca_pml_proc_t instance cached in the communicators local group.
@ -88,7 +85,7 @@ static inline struct mca_ptl_base_peer_t* mca_pml_teg_proc_lookup_remote_peer(
struct mca_ptl_base_module_t* ptl)
{
ompi_proc_t* proc = comm->c_remote_group->grp_proc_pointers[rank];
mca_pml_proc_t* proc_pml = proc->proc_pml;
mca_pml_teg_proc_t* proc_pml =(mca_pml_teg_proc_t*) proc->proc_pml;
size_t i, size = mca_ptl_array_get_size(&proc_pml->proc_ptl_first);
mca_ptl_proc_t* proc_ptl = proc_pml->proc_ptl_first.ptl_procs;
for(i = 0; i < size; i++) {

Просмотреть файл

@ -19,7 +19,7 @@
#include "pml_teg_ptl.h"
static void mca_pml_teg_ptl_construct(mca_pml_teg_ptl_t* ptl)
static void mca_pml_base_ptl_construct(mca_pml_base_ptl_t* ptl)
{
OBJ_CONSTRUCT(&ptl->ptl_cache, opal_list_t);
OBJ_CONSTRUCT(&ptl->ptl_cache_lock, opal_mutex_t);
@ -28,16 +28,16 @@ static void mca_pml_teg_ptl_construct(mca_pml_teg_ptl_t* ptl)
ptl->ptl_cache_alloc = 0;
}
static void mca_pml_teg_ptl_destruct(mca_pml_teg_ptl_t* ptl)
static void mca_pml_base_ptl_destruct(mca_pml_base_ptl_t* ptl)
{
OBJ_DESTRUCT(&ptl->ptl_cache);
OBJ_DESTRUCT(&ptl->ptl_cache_lock);
}
OBJ_CLASS_INSTANCE(
mca_pml_teg_ptl_t,
mca_pml_base_ptl_t,
opal_list_t,
mca_pml_teg_ptl_construct,
mca_pml_teg_ptl_destruct
mca_pml_base_ptl_construct,
mca_pml_base_ptl_destruct
);

Просмотреть файл

@ -25,16 +25,16 @@ extern "C" {
#endif
struct mca_pml_teg_ptl_t {
struct mca_pml_base_ptl_t {
opal_list_t ptl_cache; /**< cache of send requests */
size_t ptl_cache_size; /**< maximum size of cache */
size_t ptl_cache_alloc; /**< current number of allocated items */
opal_mutex_t ptl_cache_lock; /**< lock for queue access */
struct mca_ptl_base_module_t* ptl; /**< back pointer to ptl */
};
typedef struct mca_pml_teg_ptl_t mca_pml_teg_ptl_t;
typedef struct mca_pml_base_ptl_t mca_pml_base_ptl_t;
OBJ_CLASS_DECLARATION(mca_pml_teg_ptl_t);
OBJ_CLASS_DECLARATION(mca_pml_base_ptl_t);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -80,7 +80,7 @@ OBJ_CLASS_INSTANCE(
int mca_pml_teg_send_request_schedule(mca_ptl_base_send_request_t* req)
{
ompi_proc_t *proc;
mca_pml_proc_t* proc_pml;
mca_pml_teg_proc_t* proc_pml;
int send_count = 0;
size_t bytes_remaining;
size_t num_ptl_avail;
@ -94,7 +94,7 @@ int mca_pml_teg_send_request_schedule(mca_ptl_base_send_request_t* req)
*/
if(OPAL_THREAD_ADD32(&req->req_lock,1) == 1) {
proc = ompi_comm_peer_lookup(req->req_send.req_base.req_comm, req->req_send.req_base.req_peer);
proc_pml = proc->proc_pml;
proc_pml = (mca_pml_teg_proc_t*) proc->proc_pml;
do {
/* allocate remaining bytes to PTLs */
bytes_remaining = req->req_send.req_bytes_packed - req->req_offset;

Просмотреть файл

@ -41,9 +41,10 @@ OBJ_CLASS_DECLARATION(mca_pml_teg_send_request_t);
sendreq, \
rc) \
{ \
mca_pml_proc_t *proc = mca_pml_teg_proc_lookup_remote(comm,dst); \
mca_pml_teg_proc_t *proc = \
(mca_pml_teg_proc_t*) mca_pml_teg_proc_lookup_remote(comm,dst); \
mca_ptl_proc_t* ptl_proc; \
mca_pml_teg_ptl_t* ptl_base; \
mca_pml_base_ptl_t* ptl_base; \
\
if(NULL == proc) { \
return OMPI_ERR_OUT_OF_RESOURCE; \
@ -125,7 +126,7 @@ OBJ_CLASS_DECLARATION(mca_pml_teg_send_request_t);
#define MCA_PML_TEG_SEND_REQUEST_RETURN(sendreq) \
{ \
mca_ptl_base_module_t* ptl = (sendreq)->req_ptl; \
mca_pml_teg_ptl_t* ptl_base = (mca_pml_teg_ptl_t*)ptl->ptl_base; \
mca_pml_base_ptl_t* ptl_base = ptl->ptl_base; \
\
/* Let the base handle the reference counts */ \
MCA_PML_BASE_SEND_REQUEST_FINI((&sendreq->req_send)); \

Просмотреть файл

@ -53,9 +53,7 @@ mca_pml_uniq_t mca_pml_uniq = {
mca_pml_uniq_send,
mca_pml_uniq_iprobe,
mca_pml_uniq_probe,
mca_pml_uniq_start,
32768,
(0x7fffffff)
mca_pml_uniq_start
}
};
@ -172,7 +170,8 @@ static int mca_pml_uniq_add_ptls( void )
int mca_pml_uniq_enable( bool enable )
{
size_t i;
int value = enable;
int value = enable, rc;
uint32_t proc_arch;
/* If I'm not selected then prepare for close */
if( false == enable ) return OMPI_SUCCESS;
@ -186,6 +185,14 @@ int mca_pml_uniq_enable( bool enable )
mca_pml_uniq.uniq_free_list_inc,
NULL );
/* I get selected. Publish my informations */
proc_arch = ompi_proc_local()->proc_arch;
proc_arch = htonl(proc_arch);
rc = mca_pml_base_modex_send(&mca_pml_uniq_component.pmlm_version, &proc_arch, sizeof(proc_arch));
if( rc != OMPI_SUCCESS )
return rc;
/* Grab all the PTLs and prepare them */
mca_pml_uniq_add_ptls();
@ -221,6 +228,20 @@ int mca_pml_uniq_add_procs(ompi_proc_t** procs, size_t nprocs)
if( OMPI_SUCCESS != rc )
return rc;
/* iterate through each of the procs and set the peers architecture */
for( p = 0; p < nprocs; p++ ) {
uint32_t* proc_arch;
size_t size = sizeof(uint32_t);
rc = mca_pml_base_modex_recv(&mca_pml_uniq_component.pmlm_version, procs[p],
(void**)&proc_arch, &size);
if(rc != OMPI_SUCCESS)
return rc;
if(size != sizeof(uint32_t))
return OMPI_ERROR;
procs[p]->proc_arch = ntohl(*proc_arch);
free(proc_arch);
}
/* attempt to add all procs to each ptl */
ptl_peers = (struct mca_ptl_base_peer_t **)malloc(nprocs * sizeof(struct mca_ptl_base_peer_t*));
for( p_index = 0; p_index < mca_pml_uniq.uniq_num_ptl_modules; p_index++ ) {
@ -243,12 +264,12 @@ int mca_pml_uniq_add_procs(ompi_proc_t** procs, size_t nprocs)
/* for each proc that is reachable - add the ptl to the procs array(s) */
for( p = 0; p < nprocs; p++) {
ompi_proc_t *proc;
mca_pml_proc_t* proc_pml;
mca_pml_uniq_proc_t* proc_pml;
if( !ompi_bitmap_is_set_bit(&reachable, p) ) continue;
proc = procs[p];
proc_pml = proc->proc_pml;
proc_pml = (mca_pml_uniq_proc_t*) proc->proc_pml;
/* this ptl can be used */
ptl_inuse++;
@ -264,8 +285,8 @@ int mca_pml_uniq_add_procs(ompi_proc_t** procs, size_t nprocs)
return OMPI_ERR_OUT_OF_RESOURCE;
}
proc_pml->proc_ompi = proc;
proc->proc_pml = proc_pml;
proc_pml->base.proc_ompi = proc;
proc->proc_pml = (mca_pml_proc_t*) proc_pml;
/* it's the first PTL so add it to both first and next */
proc_pml->proc_ptl_flags |= ptl->ptl_flags;
if (NULL == ptl->ptl_base &&
@ -273,10 +294,10 @@ int mca_pml_uniq_add_procs(ompi_proc_t** procs, size_t nprocs)
NULL != ptl->ptl_request_init &&
NULL != ptl->ptl_request_fini) {
mca_pml_uniq_ptl_t* ptl_base = OBJ_NEW(mca_pml_uniq_ptl_t);
mca_pml_base_ptl_t* ptl_base = OBJ_NEW(mca_pml_base_ptl_t);
ptl_base->ptl = ptl;
ptl_base->ptl_cache_size = ptl->ptl_cache_size;
ptl->ptl_base = (struct mca_pml_base_ptl_t*)ptl_base;
ptl->ptl_base = ptl_base;
}
proc_pml->proc_ptl_first.ptl_base = ptl->ptl_base;
proc_pml->proc_ptl_first.ptl_peer = ptl_peers[p];
@ -339,7 +360,7 @@ int mca_pml_uniq_del_procs(ompi_proc_t** procs, size_t nprocs)
int rc;
for(p = 0; p < nprocs; p++) {
ompi_proc_t *proc = procs[p];
mca_pml_proc_t* proc_pml = proc->proc_pml;
mca_pml_uniq_proc_t* proc_pml = (mca_pml_uniq_proc_t*) proc->proc_pml;
mca_ptl_proc_t* ptl_proc;
mca_ptl_base_module_t* ptl;

Просмотреть файл

@ -105,7 +105,7 @@ int mca_pml_uniq_component_open(void)
mca_pml_uniq.uniq_priority =
mca_pml_uniq_param_register_int("priority", 0);
return OMPI_SUCCESS;
return mca_ptl_base_open();
}

Просмотреть файл

@ -21,11 +21,11 @@
#include "pml_uniq_proc.h"
static void mca_pml_uniq_proc_construct(mca_pml_proc_t* proc)
static void mca_pml_uniq_proc_construct(mca_pml_uniq_proc_t* proc)
{
proc->proc_ompi = NULL;
proc->base.proc_ompi = NULL;
proc->proc_ptl_flags = 0;
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
OBJ_CONSTRUCT(&proc->base.proc_lock, opal_mutex_t);
proc->proc_ptl_first.ptl_peer = NULL;
proc->proc_ptl_first.ptl_base = NULL;
@ -41,13 +41,13 @@ static void mca_pml_uniq_proc_construct(mca_pml_proc_t* proc)
}
static void mca_pml_uniq_proc_destruct(mca_pml_proc_t* proc)
static void mca_pml_uniq_proc_destruct(mca_pml_uniq_proc_t* proc)
{
OPAL_THREAD_LOCK(&mca_pml_uniq.uniq_lock);
opal_list_remove_item(&mca_pml_uniq.uniq_procs, (opal_list_item_t*)proc);
OPAL_THREAD_UNLOCK(&mca_pml_uniq.uniq_lock);
OBJ_DESTRUCT(&proc->proc_lock);
OBJ_DESTRUCT(&proc->base.proc_lock);
}
OBJ_CLASS_INSTANCE(

Просмотреть файл

@ -49,21 +49,19 @@ extern "C" {
* Structure associated w/ ompi_proc_t that contains data specific
* to the PML. Note that this name is not PML specific.
*/
struct mca_pml_proc_t {
opal_list_item_t super;
ompi_proc_t *proc_ompi; /**< back-pointer to ompi_proc_t */
opal_mutex_t proc_lock; /**< lock to protect against concurrent access */
mca_ptl_proc_t proc_ptl_first; /**< ptl for the first fragment */
struct mca_pml_uniq_proc_t {
mca_pml_proc_t base;
mca_ptl_proc_t proc_ptl_first; /**< ptl for the first fragment */
#if PML_UNIQ_ACCEPT_NEXT_PTL
mca_ptl_proc_t proc_ptl_next; /**< ptl for the remaining fragments */
mca_ptl_proc_t proc_ptl_next; /**< ptl for the remaining fragments */
#endif /* PML_UNIQ_ACCEPT_NEXT_PTL */
uint32_t proc_ptl_flags; /**< aggregate ptl flags */
uint32_t proc_ptl_flags; /**< aggregate ptl flags */
};
typedef struct mca_pml_proc_t mca_pml_proc_t;
typedef struct mca_pml_uniq_proc_t mca_pml_uniq_proc_t;
OMPI_COMP_EXPORT extern opal_class_t mca_pml_uniq_proc_t_class;
typedef struct mca_pml_proc_t mca_pml_uniq_proc_t;
/**
* Return the mca_pml_proc_t instance cached in the communicators local group.
@ -107,7 +105,7 @@ extern "C" {
struct mca_ptl_base_module_t* ptl)
{
ompi_proc_t* proc = comm->c_remote_group->grp_proc_pointers[rank];
mca_pml_proc_t* proc_pml = proc->proc_pml;
mca_pml_uniq_proc_t* proc_pml = (mca_pml_uniq_proc_t*) proc->proc_pml;
if( proc_pml->proc_ptl_first.ptl == ptl )
return proc_pml->proc_ptl_first.ptl_peer;
#if PML_UNIQ_ACCEPT_NEXT_PTL

Просмотреть файл

@ -19,7 +19,7 @@
#include "pml_uniq_ptl.h"
static void mca_pml_uniq_ptl_construct(mca_pml_uniq_ptl_t* ptl)
static void mca_pml_base_ptl_construct(mca_pml_base_ptl_t* ptl)
{
OBJ_CONSTRUCT(&ptl->ptl_cache, opal_list_t);
OBJ_CONSTRUCT(&ptl->ptl_cache_lock, opal_mutex_t);
@ -28,16 +28,16 @@ static void mca_pml_uniq_ptl_construct(mca_pml_uniq_ptl_t* ptl)
ptl->ptl_cache_alloc = 0;
}
static void mca_pml_uniq_ptl_destruct(mca_pml_uniq_ptl_t* ptl)
static void mca_pml_base_ptl_destruct(mca_pml_base_ptl_t* ptl)
{
OBJ_DESTRUCT(&ptl->ptl_cache);
OBJ_DESTRUCT(&ptl->ptl_cache_lock);
}
OBJ_CLASS_INSTANCE(
mca_pml_uniq_ptl_t,
mca_pml_base_ptl_t,
opal_list_t,
mca_pml_uniq_ptl_construct,
mca_pml_uniq_ptl_destruct
mca_pml_base_ptl_construct,
mca_pml_base_ptl_destruct
);

Просмотреть файл

@ -25,16 +25,16 @@ extern "C" {
#endif
struct mca_pml_uniq_ptl_t {
struct mca_pml_base_ptl_t {
opal_list_t ptl_cache; /**< cache of send requests */
size_t ptl_cache_size; /**< maximum size of cache */
size_t ptl_cache_alloc; /**< current number of allocated items */
opal_mutex_t ptl_cache_lock; /**< lock for queue access */
struct mca_ptl_base_module_t* ptl; /**< back pointer to ptl */
};
typedef struct mca_pml_uniq_ptl_t mca_pml_uniq_ptl_t;
typedef struct mca_pml_base_ptl_t mca_pml_base_ptl_t;
OBJ_CLASS_DECLARATION(mca_pml_uniq_ptl_t);
OBJ_CLASS_DECLARATION(mca_pml_base_ptl_t);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -90,7 +90,7 @@ int mca_pml_uniq_send_request_schedule(mca_ptl_base_send_request_t* req)
* the scheduling logic once for every call.
*/
if(OPAL_THREAD_ADD32(&req->req_lock,1) == 1) {
mca_pml_proc_t* proc_pml = mca_pml_uniq_proc_lookup_remote( req->req_send.req_base.req_comm,
mca_pml_uniq_proc_t* proc_pml = mca_pml_uniq_proc_lookup_remote( req->req_send.req_base.req_comm,
req->req_send.req_base.req_peer );
#if PML_UNIQ_ACCEPT_NEXT_PTL

Просмотреть файл

@ -40,13 +40,14 @@ OBJ_CLASS_DECLARATION(mca_pml_uniq_send_request_t);
sendreq, \
rc) \
{ \
mca_pml_proc_t *proc = mca_pml_uniq_proc_lookup_remote(comm,dst); \
mca_pml_uniq_ptl_t* ptl_base; \
mca_pml_uniq_proc_t *proc = \
(mca_pml_uniq_proc_t*) mca_pml_uniq_proc_lookup_remote(comm,dst); \
mca_pml_base_ptl_t* ptl_base; \
\
if(NULL == proc) { \
return OMPI_ERR_OUT_OF_RESOURCE; \
} \
ptl_base = (mca_pml_uniq_ptl_t*)proc->proc_ptl_first.ptl_base; \
ptl_base = proc->proc_ptl_first.ptl_base; \
/* \
* check to see if there is a cache of send requests associated with \
* this ptl - if so try the allocation from there. \
@ -119,7 +120,7 @@ OBJ_CLASS_DECLARATION(mca_pml_uniq_send_request_t);
#define MCA_PML_UNIQ_SEND_REQUEST_RETURN(sendreq) \
{ \
mca_ptl_base_module_t* ptl = (sendreq)->req_ptl; \
mca_pml_uniq_ptl_t* ptl_base = (mca_pml_uniq_ptl_t*)ptl->ptl_base; \
mca_pml_base_ptl_t* ptl_base = ptl->ptl_base; \
\
/* Let the base handle the reference counts */ \
MCA_PML_BASE_SEND_REQUEST_FINI( &((sendreq)->req_send) ); \