1
1
- remove any ptl dependancy from mpi/pml interface
- beginings of a new pml
- new Bit Mover Interface (BMI) framework

This commit was SVN r5834.
Этот коммит содержится в:
Tim Woodall 2005-05-23 22:06:50 +00:00
родитель 905bf85295
Коммит 37b43eaccf
89 изменённых файлов: 6885 добавлений и 141 удалений

Просмотреть файл

@ -157,7 +157,7 @@ unset msg
# The list of MCA types (it's fixed)
AC_MSG_CHECKING([for MCA types])
found_types="common allocator coll errmgr gpr io iof mpool ns oob pls pml ptl ras rds rmaps rmgr rml schema soh topo"
found_types="common allocator bmi coll errmgr gpr io iof mpool ns oob pls pml ptl ras rds rmaps rmgr rml schema soh topo"
AC_MSG_RESULT([$found_types])
# Get the list of all the non-configure MCA components that were found by
@ -572,6 +572,11 @@ AC_SUBST(MCA_mpool_STATIC_SUBDIRS)
AC_SUBST(MCA_mpool_DSO_SUBDIRS)
AC_SUBST(MCA_mpool_STATIC_LTLIBS)
AC_SUBST(MCA_bmi_ALL_SUBDIRS)
AC_SUBST(MCA_bmi_STATIC_SUBDIRS)
AC_SUBST(MCA_bmi_DSO_SUBDIRS)
AC_SUBST(MCA_bmi_STATIC_LTLIBS)
AC_SUBST(MCA_pml_ALL_SUBDIRS)
AC_SUBST(MCA_pml_STATIC_SUBDIRS)
AC_SUBST(MCA_pml_DSO_SUBDIRS)

Просмотреть файл

@ -1513,6 +1513,8 @@ AC_CONFIG_FILES([
src/mca/allocator/Makefile
src/mca/allocator/base/Makefile
src/mca/bmi/Makefile
src/mca/bmi/base/Makefile
src/mca/coll/Makefile
src/mca/coll/base/Makefile
src/mca/io/Makefile

Просмотреть файл

@ -142,6 +142,7 @@ libmpi_la_LIBADD = \
info/libinfo.la \
mca/base/libmca_base.la \
mca/allocator/base/libmca_allocator_base.la $(MCA_allocator_STATIC_LTLIBS) \
mca/bmi/base/libmca_bmi_base.la $(MCA_bmi_STATIC_LTLIBS) \
mca/coll/base/libmca_coll_base.la $(MCA_coll_STATIC_LTLIBS) \
$(MCA_common_STATIC_LTLIBS) \
mca/errmgr/base/libmca_errmgr_base.la \

Просмотреть файл

@ -17,6 +17,7 @@
#include "ompi_config.h"
#include "class/ompi_free_list.h"
#include "include/sys/cache.h"
static void ompi_free_list_construct(ompi_free_list_t* fl);
@ -73,16 +74,23 @@ int ompi_free_list_grow(ompi_free_list_t* flist, size_t num_elements)
{
unsigned char* ptr;
size_t i;
size_t mod;
if (flist->fl_max_to_alloc > 0 && flist->fl_num_allocated + num_elements > flist->fl_max_to_alloc)
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
if (NULL != flist->fl_mpool)
ptr = (unsigned char*)flist->fl_mpool->mpool_alloc(num_elements * flist->fl_elem_size, 0);
ptr = (unsigned char*)flist->fl_mpool->mpool_alloc((num_elements * flist->fl_elem_size) + CACHE_LINE_SIZE, 0);
else
ptr = (unsigned char *)malloc(num_elements * flist->fl_elem_size);
ptr = (unsigned char *)malloc((num_elements * flist->fl_elem_size) + CACHE_LINE_SIZE);
if(NULL == ptr)
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
mod = (unsigned long)ptr % CACHE_LINE_SIZE;
if(mod != 0) {
ptr += (CACHE_LINE_SIZE - mod);
}
for(i=0; i<num_elements; i++) {
ompi_list_item_t* item = (ompi_list_item_t*)ptr;
if (NULL != flist->fl_elem_class) {

Просмотреть файл

@ -446,7 +446,11 @@ static inline ompi_object_t *ompi_obj_new(size_t size, ompi_class_t * cls)
*/
static inline int ompi_obj_update(ompi_object_t *object, int inc)
{
#if OMPI_HAVE_THREAD_SUPPORT
ompi_atomic_add(&(object->obj_reference_count), inc );
#else
object->obj_reference_count += inc;
#endif
return object->obj_reference_count;
}

Просмотреть файл

@ -84,7 +84,6 @@ int ompi_comm_init(void)
ompi_mpi_comm_world.c_cube_dim = ompi_cube_dim(size);
ompi_mpi_comm_world.error_handler = &ompi_mpi_errors_are_fatal;
OBJ_RETAIN( &ompi_mpi_errors_are_fatal );
MCA_PML_CALL(add_comm(&ompi_mpi_comm_world));
OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_world);
ompi_pointer_array_set_item (&ompi_mpi_communicators, 0, &ompi_mpi_comm_world);
@ -114,7 +113,6 @@ int ompi_comm_init(void)
ompi_mpi_comm_self.c_remote_group = group;
ompi_mpi_comm_self.error_handler = &ompi_mpi_errors_are_fatal;
OBJ_RETAIN( &ompi_mpi_errors_are_fatal );
MCA_PML_CALL(add_comm(&ompi_mpi_comm_self));
OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_self);
ompi_pointer_array_set_item (&ompi_mpi_communicators, 1, &ompi_mpi_comm_self);

Просмотреть файл

@ -29,6 +29,7 @@ SUBDIRS = \
gpr \
mpool \
ns \
bmi \
oob \
pls \
pml \

Просмотреть файл

@ -1,4 +1,3 @@
# -*- makefile -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
@ -15,18 +14,20 @@
# $HEADER$
#
sources += \
src/ptl_tcp.c \
src/ptl_tcp.h \
src/ptl_tcp_addr.h \
src/ptl_tcp_component.c \
src/ptl_tcp_peer.c \
src/ptl_tcp_peer.h \
src/ptl_tcp_proc.c \
src/ptl_tcp_proc.h \
src/ptl_tcp_recvfrag.c \
src/ptl_tcp_recvfrag.h \
src/ptl_tcp_sendfrag.c \
src/ptl_tcp_sendfrag.h \
src/ptl_tcp_sendreq.c \
src/ptl_tcp_sendreq.h
include $(top_srcdir)/config/Makefile.options
SUBDIRS = base $(MCA_bmi_STATIC_SUBDIRS)
DIST_SUBDIRS = base $(MCA_bmi_ALL_SUBDIRS)
# Source code files
headers = bmi.h
# Conditionally install the header files
if WANT_INSTALL_HEADERS
ompidir = $(includedir)/openmpi/mca/bmi
ompi_HEADERS = $(headers)
else
ompidir = $(includedir)
endif

44
src/mca/bmi/base/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,44 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/config/Makefile.options
noinst_LTLIBRARIES = libmca_bmi_base.la
# For VPATH builds, have to specify where static-modules.h will be found
AM_CPPFLAGS = -I$(top_builddir)/src
# Source code files
headers = \
base.h
libmca_bmi_base_la_SOURCES = \
$(headers) \
bmi_base_close.c \
bmi_base_open.c \
bmi_base_select.c
# Conditionally install the header files
if WANT_INSTALL_HEADERS
ompidir = $(includedir)/openmpi/mca/bmi/base
ompi_HEADERS = $(headers)
else
ompidir = $(includedir)
endif

60
src/mca/bmi/base/base.h Обычный файл
Просмотреть файл

@ -0,0 +1,60 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_BMI_BASE_H
#define MCA_BMI_BASE_H
#include "ompi_config.h"
#include "class/ompi_list.h"
#include "mca/mca.h"
#include "mca/bmi/bmi.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct mca_bmi_base_selected_module_t {
ompi_list_item_t super;
mca_bmi_base_component_t *bmi_component;
mca_bmi_base_module_t *bmi_module;
};
typedef struct mca_bmi_base_selected_module_t mca_bmi_base_selected_module_t;
/*
* Global functions for MCA: overall BMI open and close
*/
OMPI_DECLSPEC int mca_bmi_base_open(void);
OMPI_DECLSPEC int mca_bmi_base_select(bool enable_progress_threads, bool enable_mpi_threads);
OMPI_DECLSPEC int mca_bmi_base_close(void);
/*
* Globals
*/
OMPI_DECLSPEC extern int mca_bmi_base_output;
OMPI_DECLSPEC extern char* mca_bmi_base_include;
OMPI_DECLSPEC extern char* mca_bmi_base_exclude;
OMPI_DECLSPEC extern ompi_list_t mca_bmi_base_components_opened;
OMPI_DECLSPEC extern ompi_list_t mca_bmi_base_modules_initialized;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_BMI_BASE_H */

72
src/mca/bmi/base/bmi_base_close.c Обычный файл
Просмотреть файл

@ -0,0 +1,72 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdio.h>
#include "include/constants.h"
#include "event/event.h"
#include "mca/mca.h"
#include "mca/base/base.h"
#include "mca/pml/pml.h"
#include "mca/bmi/bmi.h"
#include "mca/bmi/base/base.h"
int mca_bmi_base_close(void)
{
ompi_list_item_t *item;
mca_bmi_base_selected_module_t *sm;
/* disable event processing while cleaning up bmis */
ompi_event_disable();
/* Finalize all the bmi components and free their list items */
for (item = ompi_list_remove_first(&mca_bmi_base_modules_initialized);
NULL != item;
item = ompi_list_remove_first(&mca_bmi_base_modules_initialized)) {
sm = (mca_bmi_base_selected_module_t *) item;
/* Blatebmiy ignore the return code (what would we do to recover,
anyway? This component is going away, so errors don't matter
anymore) */
sm->bmi_module->bmi_finalize(sm->bmi_module);
free(sm);
}
/* Close all remaining opened components (may be one if this is a
OMPI RTE program, or [possibly] multiple if this is ompi_info) */
if (0 != ompi_list_get_size(&mca_bmi_base_components_opened)) {
mca_base_components_close(mca_bmi_base_output,
&mca_bmi_base_components_opened, NULL);
}
/* cleanup */
if(NULL != mca_bmi_base_include)
free(mca_bmi_base_include);
if(NULL != mca_bmi_base_exclude)
free(mca_bmi_base_exclude);
/* restore event processing */
ompi_event_enable();
/* All done */
return OMPI_SUCCESS;
}

101
src/mca/bmi/base/bmi_base_open.c Обычный файл
Просмотреть файл

@ -0,0 +1,101 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdio.h>
#include "mca/mca.h"
#include "mca/base/base.h"
#include "mca/base/mca_base_param.h"
#include "mca/pml/pml.h"
#include "mca/bmi/bmi.h"
#include "mca/bmi/base/base.h"
/*
* mca_bmi_base_descriptor_t
*/
static void mca_bmi_base_descriptor_constructor(mca_bmi_base_descriptor_t* des)
{
des->des_src = NULL;
des->des_src_cnt = 0;
des->des_dst = NULL;
des->des_dst_cnt = 0;
des->des_cbfunc = NULL;
des->des_cbdata = NULL;
des->des_flags = 0;
}
static void mca_bmi_base_descriptor_destructor(mca_bmi_base_descriptor_t* des)
{
}
OBJ_CLASS_INSTANCE(
mca_bmi_base_descriptor_t,
ompi_list_item_t,
mca_bmi_base_descriptor_constructor,
mca_bmi_base_descriptor_destructor);
/*
* The following file was created by configure. It contains extern
* statements and the definition of an array of pointers to each
* component's public mca_base_component_t struct.
*/
#include "mca/bmi/base/static-components.h"
/*
* Global variables
*/
int mca_bmi_base_output = -1;
char* mca_bmi_base_include = NULL;
char* mca_bmi_base_exclude = NULL;
ompi_list_t mca_bmi_base_components_opened;
ompi_list_t mca_bmi_base_modules_initialized;
/**
* Function for finding and opening either all MCA components, or the one
* that was specifically requested via a MCA parameter.
*/
int mca_bmi_base_open(void)
{
/* Open up all available components */
if (OMPI_SUCCESS !=
mca_base_components_open("bmi", 0, mca_bmi_base_static_components,
&mca_bmi_base_components_opened, true)) {
return OMPI_ERROR;
}
/* Initialize the list so that in mca_bmi_base_close(), we can
iterate over it (even if it's empty, as in the case of
ompi_info) */
OBJ_CONSTRUCT(&mca_bmi_base_modules_initialized, ompi_list_t);
/* register parameters */
mca_base_param_lookup_string(
mca_base_param_register_string("bmi","base","include",NULL,NULL), &mca_bmi_base_include);
mca_base_param_lookup_string(
mca_base_param_register_string("bmi","base","exclude",NULL,NULL), &mca_bmi_base_exclude);
/* All done */
return OMPI_SUCCESS;
}

146
src/mca/bmi/base/bmi_base_select.c Обычный файл
Просмотреть файл

@ -0,0 +1,146 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "util/argv.h"
#include "runtime/runtime.h"
#include "mca/mca.h"
#include "mca/base/base.h"
#include "mca/pml/pml.h"
#include "mca/bmi/bmi.h"
#include "mca/bmi/base/base.h"
/**
* Function for weeding out bmi components that don't want to run.
*
* Call the init function on all available components to find out if
* they want to run. Select all components that don't fail. Failing
* components will be closed and unloaded. The selected modules will
* be returned to the caller in a ompi_list_t.
*/
int mca_bmi_base_select(bool enable_progress_threads,
bool enable_mpi_threads)
{
int i, num_bmis;
ompi_list_item_t *item;
mca_base_component_list_item_t *cli;
mca_bmi_base_component_t *component;
mca_bmi_base_module_t **modules;
mca_bmi_base_selected_module_t *sm;
char** include = ompi_argv_split(mca_bmi_base_include, ',');
char** exclude = ompi_argv_split(mca_bmi_base_exclude, ',');
/* Traverse the list of opened modules; call their init
functions. */
item = ompi_list_get_first(&mca_bmi_base_components_opened);
while(item != ompi_list_get_end(&mca_bmi_base_components_opened)) {
ompi_list_item_t *next = ompi_list_get_next(item);
cli = (mca_base_component_list_item_t *) item;
component = (mca_bmi_base_component_t *) cli->cli_component;
/* if there is an include list - item must be in the list to be included */
if ( NULL != include ) {
char** argv = include;
bool found = false;
while(argv && *argv) {
if(strcmp(component->bmi_version.mca_component_name,*argv) == 0) {
found = true;
break;
}
argv++;
}
if(found == false) {
item = next;
continue;
}
/* otherwise - check the exclude list to see if this item has been specifically excluded */
} else if ( NULL != exclude ) {
char** argv = exclude;
bool found = false;
while(argv && *argv) {
if(strcmp(component->bmi_version.mca_component_name,*argv) == 0) {
found = true;
break;
}
argv++;
}
if(found == true) {
item = next;
continue;
}
}
ompi_output_verbose(10, mca_bmi_base_output,
"select: initializing %s component %s",
component->bmi_version.mca_type_name,
component->bmi_version.mca_component_name);
if (NULL == component->bmi_init) {
ompi_output_verbose(10, mca_bmi_base_output,
"select: no init function; ignoring component");
} else {
modules = component->bmi_init(&num_bmis, enable_progress_threads,
enable_mpi_threads);
/* If the component didn't initialize, remove it from the opened
list and remove it from the component repository */
if (NULL == modules) {
ompi_output_verbose(10, mca_bmi_base_output,
"select: init returned failure");
ompi_output_verbose(10, mca_bmi_base_output,
"select: module %s unloaded",
component->bmi_version.mca_component_name);
mca_base_component_repository_release((mca_base_component_t *) component);
ompi_list_remove_item(&mca_bmi_base_components_opened, item);
}
/* Otherwise, it initialized properly. Save it. */
else {
ompi_output_verbose(10, mca_bmi_base_output,
"select: init returned success");
for (i = 0; i < num_bmis; ++i) {
sm = malloc(sizeof(mca_bmi_base_selected_module_t));
if (NULL == sm) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
OBJ_CONSTRUCT(sm, ompi_list_item_t);
sm->bmi_component = component;
sm->bmi_module = modules[i];
ompi_list_append(&mca_bmi_base_modules_initialized,
(ompi_list_item_t*) sm);
}
free(modules);
}
}
item = next;
}
/* Finished querying all components. Check for the bozo case. */
if (0 == ompi_list_get_size(&mca_bmi_base_modules_initialized)) {
/* JMS Replace with show_help */
orte_abort(1, "No bmi components available. This shouldn't happen.");
}
return OMPI_SUCCESS;
}

466
src/mca/bmi/bmi.h Обычный файл
Просмотреть файл

@ -0,0 +1,466 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Bit Mover Interface (BMI)
*
*
* BMI Initialization:
*
* During library initialization, all available BMI components are
* loaded and opened via their mca_base_open_component_fn_t
* function. The BMI open function should register any mca parameters
* used to tune/adjust the behaviour of the BMI (mca_base_param_register_int(),
* mca_base_param_register_string()). Note that the open function may fail
* if the resources (e.g. shared libraries, etc) required by the network
* transport are not available.
*
* The mca_bmi_base_component_init_fn_t() is then called for each of the
* components that are succesfully opened. The component init function may
* return either:
*
* (1) a NULL list of BMI modules if the transport is not available,
* (2) a list containing a single BMI module, where the BMI provides
* a layer of abstraction over multiple physical devices (e.g. NICs),
* (3) a list containing multiple BMI modules where each BMI module
* corresponds to a single physical device.
*
* During module initialization, the module should post any addressing
* information required by its peers. An example would be the TCP
* listen port opened by the TCP module for incoming connection
* requests. This information is published to peers via the
* mca_base_modex_send() interface. Note that peer information is not
* guaranteed to be available via mca_base_modex_recv() during the
* module's init function. However, it will be available during
* BMI selection (mca_bmi_base_add_proc_fn_t()).
*
* BMI Selection:
*
* The upper layer builds an ordered list of the available BMI modules sorted
* by their exclusivity ranking. This is a relative ranking that is used
* to determine the set of BMIs that may be used to reach a given destination.
* During startup the BMI modules are queried via their
* mca_bmi_base_add_proc_fn_t() to determine if they are able to reach
* a given destination. The BMI module with the highest ranking that
* returns success is selected. Subsequent BMI modules are selected only
* if they have the same exclusivity ranking.
*
* An example of how this might be used:
*
* BMI Exclusivity Comments
* -------- ----------- ------------------
* LO 100 Selected exclusively for local process
* SM 50 Selected exclusively for other processes on host
* IB 0 Selected based on network reachability
* IB 0 Selected based on network reachability
* TCP 0 Selected based on network reachability
* TCP 0 Selected based on network reachability
*
* When a BMI module is selected, it may choose to optionally return a
* pointer to an an mca_bmi_base_endpoint_t data structure to the PML.
* This pointer is treated as an opaque handle by the PML and is
* returned to the BMI on subsequent data transfer calls to the
* corresponding destination process. The actual contents of the
* data structure are defined on a per BMI basis, and may be used to
* cache addressing or connection information, such as a TCP socket
* or IB queue pair.
*
* Progress:
*
* By default, the library provides for polling based progress of outstanding
* requests. The BMI component exports an interface function (bmim_progress)
* that is called in a polling mode by the PML during calls into the MPI
* library. Note that the bmim_progress() function is called on the BMI component
* rather than each BMI module. This implies that the BMI author is responsible
* for iterating over the pending operations in each of the BMI modules associated
* with the component.
*
* On platforms where threading support is provided, the library provides the
* option of building with asynchronous threaded progress. In this case, the BMI
* author is responsible for providing a thread to progress pending operations.
* A thread is associated with the BMI component/module such that transport specific
* functionality/APIs may be used to block the thread ubmil a pending operation
* completes. This thread MUST NOT poll for completion as this would oversubscribe
* the CPU.
*
* Note that in the threaded case the PML may choose to use a hybrid approach,
* such that polling is implemented from the user thread for a fixed number of
* cycles before relying on the background thread(s) to complete requests. If
* possible the BMI should support the use of both modes concurrebmiy.
*
*/
#include "mca/mca.h"
#ifndef MCA_BMI_H
#define MCA_BMI_H
#include "include/types.h"
/*
* BMI types
*/
struct mca_bmi_base_module_t;
struct mca_bmi_base_endpoint_t;
struct mca_bmi_base_descriptor_t;
/* send/recv operations require tag matching */
typedef uint8_t mca_bmi_base_tag_t;
/* reserved tag values */
#define MCA_BMI_TAG_BMI 0
#define MCA_BMI_TAG_PML 1
#define MCA_BMI_TAG_USR 2
typedef void (*mca_bmi_base_completion_fn_t)(
struct mca_bmi_base_module_t*,
struct mca_bmi_base_endpoint_t*,
struct mca_bmi_base_descriptor_t*,
int status);
/**
* Describes a region/segment of memory that is addressable
* by an BMI.
*/
struct mca_bmi_base_segment_t {
ompi_ptr_t seg_addr;
uint32_t seg_len;
union {
uint32_t key32[2];
uint64_t key64;
uint8_t key8[8];
} seg_key;
};
typedef struct mca_bmi_base_segment_t mca_bmi_base_segment_t;
/**
* A descriptor that holds the parameters to a send/put/get
* operation along w/ a callback function that is called on
* completion of the request.
*/
struct mca_bmi_base_descriptor_t {
ompi_list_item_t super;
mca_bmi_base_segment_t *des_src;
size_t des_src_cnt;
mca_bmi_base_segment_t *des_dst;
size_t des_dst_cnt;
mca_bmi_base_completion_fn_t des_cbfunc;
void* des_cbdata;
int32_t des_flags;
};
typedef struct mca_bmi_base_descriptor_t mca_bmi_base_descriptor_t;
OBJ_CLASS_DECLARATION(mca_bmi_base_descriptor_t);
#define MCA_BMI_DES_FLAGS_BMI 0x0001
#define MCA_BMI_DES_FLAGS_PML 0x0002
/*
* BMI component interface functions and datatype.
*/
/**
* MCA->BMI Ibmializes the BMI component and creates specific BMI
* module(s).
*
* @param num_bmis (OUT) Returns the number of bmi modules created, or 0
* if the transport is not available.
*
* @param enable_progress_threads (IN) Whether this component is
* allowed to run a hidden/progress thread or not.
*
* @param enable_mpi_threads (IN) Whether support for multiple MPI
* threads is enabled or not (i.e., MPI_THREAD_MULTIPLE), which
* indicates whether multiple threads may invoke this component
* simultaneously or not.
*
* @return Array of pointers to BMI modules, or NULL if the transport
* is not available.
*
* During component initialization, the BMI component should discover
* the physical devices that are available for the given transport,
* and create a BMI module to represent each device. Any addressing
* information required by peers to reach the device should be published
* during this function via the mca_base_modex_send() interface.
*
*/
typedef struct mca_bmi_base_module_t** (*mca_bmi_base_component_init_fn_t)(
int *num_bmis,
bool enable_progress_threads,
bool enable_mpi_threads
);
/**
* MCA->BMI Called to progress outstanding requests for
* non-threaded polling environments.
*
* @param tstamp Current time.
* @return OMPI_SUCCESS or error code on failure.
*/
typedef int (*mca_bmi_base_component_progress_fn_t)(void);
/**
* BMI component descriptor. Contains component version information
* and component open/close/init functions.
*/
struct mca_bmi_base_component_1_0_0_t {
mca_base_component_t bmi_version;
mca_base_component_data_1_0_0_t bmi_data;
mca_bmi_base_component_init_fn_t bmi_init;
mca_bmi_base_component_progress_fn_t bmi_progress;
};
typedef struct mca_bmi_base_component_1_0_0_t mca_bmi_base_component_1_0_0_t;
typedef struct mca_bmi_base_component_1_0_0_t mca_bmi_base_component_t;
/*
* BMI module interface functions and datatype.
*/
/**
* MCA->BMI Clean up any resources held by BMI module
* before the module is unloaded.
*
* @param bmi (IN) BMI module.
*
* Prior to unloading a BMI module, the MCA framework will call
* the BMI finalize method of the module. Any resources held by
* the BMI should be released and if required the memory corresponding
* to the BMI module freed.
*
*/
typedef int (*mca_bmi_base_module_finalize_fn_t)(
struct mca_bmi_base_module_t* bmi
);
/**
* PML->BMI notification of change in the process list.
*
* @param bmi (IN) BMI module
* @param nprocs (IN) Number of processes
* @param procs (IN) Set of processes
* @param endpoint (OUT) Set of (optional) mca_bmi_base_endpoint_t structures by BMI.
* @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this BMI.
* @return OMPI_SUCCESS or error status on failure.
*
* The mca_bmi_base_module_add_procs_fn_t() is called by the PML to
* determine the set of BMIs that should be used to reach each process.
* Any addressing information exported by the peer via the mca_base_modex_send()
* function should be available during this call via the corresponding
* mca_base_modex_recv() function. The BMI may utilize this information to
* determine reachability of each peer process.
*
* For each process that is reachable by the BMI, the bit corresponding to the index
* into the proc array (nprocs) should be set in the reachable bitmask. The PML
* provides the BMI the option to return a pointer to a data structure defined
* by the BMI that is returned to the BMI on subsequent calls to the BMI data
* transfer functions (e.g bmi_send). This may be used by the BMI to cache any addressing
* or connection information (e.g. TCP socket, IP queue pair).
*/
typedef int (*mca_bmi_base_module_add_procs_fn_t)(
struct mca_bmi_base_module_t* bmi,
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_bmi_base_endpoint_t** endpoints,
struct ompi_bitmap_t* reachable
);
/**
* Notification of change to the process list.
*
* @param bmi (IN) BMI module
* @param nprocs (IN) Number of processes
* @param proc (IN) Set of processes
* @param peer (IN) Set of peer addressing information.
* @return Status indicating if cleanup was successful
*
* When the process list changes, the PML notifies the BMI of the
* change, to provide the opportunity to cleanup or release any
* resources associated with the peer.
*/
typedef int (*mca_bmi_base_module_del_procs_fn_t)(
struct mca_bmi_base_module_t* bmi,
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_bmi_base_endpoint_t**
);
/**
* Callback function that is called asynchronously on receipt
* of data from the transport layer.
*/
typedef void (*mca_bmi_base_module_recv_cb_fn_t)(
struct mca_bmi_base_module_t* bmi,
mca_bmi_base_tag_t tag,
mca_bmi_base_descriptor_t* descriptor,
void* cbdata
);
/**
* Register a callback function that is called on receipt
* of a fragment.
*
* @param bmi (IN) BMI module
* @return Status indicating if cleanup was successful
*
* When the process list changes, the PML notifies the BMI of the
* change, to provide the opportunity to cleanup or release any
* resources associated with the peer.
*/
typedef int (*mca_bmi_base_module_register_fn_t)(
struct mca_bmi_base_module_t* bmi,
mca_bmi_base_tag_t tag,
mca_bmi_base_module_recv_cb_fn_t cbfunc,
void* cbdata
);
/**
* Allocate a segment.
*
* @param bmi (IN) BMI module
* @param size (IN) Request segment size.
*/
typedef mca_bmi_base_descriptor_t* (*mca_bmi_base_module_alloc_fn_t)(
struct mca_bmi_base_module_t* bmi,
size_t size
);
/**
* Return a segment allocated by this BMI.
*
* @param bmi (IN) BMI module
* @param segment (IN) Allocated segment.
*/
typedef int (*mca_bmi_base_module_free_fn_t)(
struct mca_bmi_base_module_t* bmi,
mca_bmi_base_descriptor_t* descriptor
);
/**
* Pack data and return a descriptor that can be
* used for send/put.
*
* @param bmi (IN) BMI module
* @param peer (IN) BMI peer addressing
*/
typedef struct mca_bmi_base_descriptor_t* (*mca_bmi_base_module_pack_fn_t)(
struct mca_bmi_base_module_t* bmi,
struct mca_bmi_base_endpoint_t* peer,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size
);
/**
* Initiate a send to the peer.
*
* @param bmi (IN) BMI module
* @param peer (IN) BMI peer addressing
*/
typedef int (*mca_bmi_base_module_send_fn_t)(
struct mca_bmi_base_module_t* bmi,
struct mca_bmi_base_endpoint_t* endpoint,
struct mca_bmi_base_descriptor_t* descriptor,
mca_bmi_base_tag_t tag
);
/**
* Initiate a put to the peer.
*
* @param bmi (IN) BMI module
* @param peer (IN) BMI peer addressing
*/
typedef int (*mca_bmi_base_module_put_fn_t)(
struct mca_bmi_base_module_t* bmi,
struct mca_bmi_base_endpoint_t* peer,
struct mca_bmi_base_descriptor_t* descriptor
);
/**
* PML->BMI Initiate a get from a peer.
*
* @param bmi (IN) BMI module
* @param peer (IN) BMI peer addressing
*
*/
typedef int (*mca_bmi_base_module_get_fn_t)(
struct mca_bmi_base_module_t* bmi,
struct mca_bmi_base_endpoint_t* endpoint,
struct mca_bmi_base_descriptor_t* descriptor
);
/**
* BMI module interface functions and attributes.
*/
struct mca_bmi_base_module_t {
/* BMI common attributes */
mca_bmi_base_component_t* bmi_component; /**< pointer back to the BMI component structure */
size_t bmi_first_frag_size; /**< maximum size of first fragment -- eager send */
size_t bmi_min_frag_size; /**< threshold below which the BMI will not fragment */
size_t bmi_max_frag_size; /**< maximum fragment size supported by the BMI */
uint32_t bmi_exclusivity; /**< indicates this BMI should be used exclusively */
uint32_t bmi_latency; /**< relative ranking of latency used to prioritize bmis */
uint32_t bmi_bandwidth; /**< bandwidth (Mbytes/sec) supported by each endpoint */
uint32_t bmi_flags; /**< flags (put/get...) */
/* BMI function table */
mca_bmi_base_module_add_procs_fn_t bmi_add_procs;
mca_bmi_base_module_del_procs_fn_t bmi_del_procs;
mca_bmi_base_module_register_fn_t bmi_register;
mca_bmi_base_module_finalize_fn_t bmi_finalize;
mca_bmi_base_module_alloc_fn_t bmi_alloc;
mca_bmi_base_module_free_fn_t bmi_free;
mca_bmi_base_module_pack_fn_t bmi_pack;
mca_bmi_base_module_send_fn_t bmi_send;
mca_bmi_base_module_put_fn_t bmi_put;
mca_bmi_base_module_get_fn_t bmi_get;
};
typedef struct mca_bmi_base_module_t mca_bmi_base_module_t;
/*
* Macro for use in modules that are of type bmi v1.0.0
*/
#define MCA_BMI_BASE_VERSION_1_0_0 \
/* coll v1.0 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \
/* bmi v1.0 */ \
"bmi", 1, 0, 0
#endif /* OMPI_MCA_BMI_H */

0
src/mca/bmi/sm/.ompi_ignore Обычный файл
Просмотреть файл

2
src/mca/bmi/sm/.ompi_unignore Обычный файл
Просмотреть файл

@ -0,0 +1,2 @@
twoodall
gshipman

45
src/mca/bmi/sm/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,45 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Use the top-level Makefile.options
include $(top_ompi_srcdir)/config/Makefile.options
libmca_bmi_sm_la_SOURCES = \
bmi_sm.c \
bmi_sm.h \
bmi_sm_component.c \
bmi_sm_frag.c \
bmi_sm_frag.h
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_bmi_sm_DSO
component_noinst =
component_install = mca_bmi_sm.la
else
component_noinst = libmca_bmi_sm.la
component_install =
endif
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_bmi_sm_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_bmi_sm_la_LDFLAGS = -module -avoid-version

819
src/mca/bmi/sm/bmi_sm.c Обычный файл
Просмотреть файл

@ -0,0 +1,819 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include "threads/mutex.h"
#include "datatype/datatype.h"
#include "include/sys/atomic.h"
#include "util/output.h"
#include "util/if.h"
#include "util/proc_info.h"
#include "util/printf.h"
#include "util/sys_info.h"
#include "class/ompi_fifo.h"
#include "class/ompi_free_list.h"
#include "mca/pml/pml.h"
#include "mca/bmi/bmi.h"
#include "mca/mpool/base/base.h"
#include "mca/common/sm/common_sm_mmap.h"
#include "bmi_sm.h"
#include "bmi_sm_endpoint.h"
#include "bmi_sm_frag.h"
#include "bmi_sm_fifo.h"
mca_bmi_sm_t mca_bmi_sm[2] = {
{
{
&mca_bmi_sm_component.super,
0, /* bmi_first_frag_size */
0, /* bmi_min_frag_size */
0, /* bmi_max_frag_size */
0, /* bmi_exclusivity */
0, /* bmi_latency */
0, /* bmi_bandwidth */
0, /* bmi flags */
mca_bmi_sm_add_procs_same_base_addr,
mca_bmi_sm_del_procs,
mca_bmi_sm_register,
mca_bmi_sm_finalize,
mca_bmi_sm_alloc,
mca_bmi_sm_free,
mca_bmi_sm_pack,
mca_bmi_sm_send,
NULL, /* put */
NULL /* get */
}
},
{
{
&mca_bmi_sm_component.super,
0, /* bmi_first_frag_size */
0, /* bmi_min_frag_size */
0, /* bmi_max_frag_size */
0, /* bmi_exclusivity */
0, /* bmi_latency */
0, /* bmi_bandwidth */
0, /* bmi flags */
mca_bmi_sm_add_procs,
mca_bmi_sm_del_procs,
mca_bmi_sm_register,
mca_bmi_sm_finalize,
mca_bmi_sm_alloc,
mca_bmi_sm_free,
mca_bmi_sm_pack,
mca_bmi_sm_send,
NULL, /* get function */
NULL /* put function */
}
}
};
/* track information needed to synchronise a Shared Memory BMI module */
mca_bmi_sm_module_resource_t mca_bmi_sm_module_resource;
int mca_bmi_sm_add_procs_same_base_addr(
struct mca_bmi_base_module_t* bmi,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_bmi_base_endpoint_t **peers,
ompi_bitmap_t* reachability)
{
int return_code=OMPI_SUCCESS;
size_t i,j,proc,size,n_to_allocate,length;
int n_local_procs,cnt,len, my_len;
mca_bmi_sm_exchange_t **sm_proc_info;
ompi_proc_t* my_proc; /* pointer to caller's proc structure */
mca_bmi_sm_t *bmi_sm;
ompi_fifo_t *my_fifos;
ompi_fifo_t * volatile *fifo_tmp;
bool same_sm_base;
ssize_t diff;
volatile char **tmp_ptr;
/* initializion */
for(i=0 ; i < nprocs ; i++ ) {
peers[i]=NULL;
}
bmi_sm=(mca_bmi_sm_t *)bmi;
/* allocate array to hold setup shared memory from all
* other procs */
sm_proc_info=(mca_bmi_sm_exchange_t **)
malloc(nprocs*sizeof(mca_bmi_sm_exchange_t *));
if( NULL == sm_proc_info ){
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
mca_bmi_sm_component.sm_proc_connect=(int *) malloc(nprocs*sizeof(int));
if( NULL == mca_bmi_sm_component.sm_proc_connect ){
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* initialize sm_proc_info and sm_proc_connect*/
for(proc=0 ; proc < nprocs ; proc++ ) {
sm_proc_info[proc]=0;
mca_bmi_sm_component.sm_proc_connect[proc]=0;
}
/* get pointer to my proc structure */
my_proc=ompi_proc_local();
if( NULL == my_proc ) {
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
my_len=strlen(orte_system_info.nodename);
/* Get unique host identifier for each process in the list,
* and idetify procs that are on this host. Add procs on this
* host to shared memory reachbility list. Also, get number
* of local procs in the prcs list. */
n_local_procs=0;
for( proc=0 ; proc < nprocs; proc++ ) {
/* check to see if this is me */
if( my_proc == procs[proc] ) {
mca_bmi_sm_component.my_smp_rank=n_local_procs;
}
if( procs[proc]->proc_name.jobid != my_proc->proc_name.jobid ) {
continue;
}
return_code = mca_base_modex_recv(
&mca_bmi_sm_component.super.bmi_version, procs[proc],
(void**)(&(sm_proc_info[proc])), &size);
if(return_code != OMPI_SUCCESS) {
ompi_output(0, "mca_bmi_sm_add_procs: mca_base_modex_recv: failed with return value=%d", return_code);
goto CLEANUP;
}
/* for zero length, just continue - comparison is meaningless*/
if( 0 >= size ) {
continue;
}
/* check to see if this proc is on my host */
len=strlen((char *)(sm_proc_info[proc]));
if( len == my_len ) {
if( 0 == strncmp(orte_system_info.nodename,
(char *)(sm_proc_info[proc]),len) ) {
struct mca_bmi_base_endpoint_t *peer = peers[proc];
#if OMPI_ENABLE_PROGRESS_THREADS == 1
char path[PATH_MAX];
/* int flags; */
#endif
/* initialize the peers information */
peer = peers[proc]=malloc(sizeof(struct mca_bmi_base_endpoint_t));
if( NULL == peer ){
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
peer->peer_smp_rank=n_local_procs+
mca_bmi_sm_component.num_smp_procs;
#if OMPI_ENABLE_PROGRESS_THREADS == 1
sprintf(path, "%s/sm_fifo.%d", orte_process_info.job_session_dir,
procs[proc]->proc_name.vpid);
peer->fifo_fd = open(path, O_WRONLY);
if(peer->fifo_fd < 0) {
ompi_output(0, "mca_bmi_sm_add_procs: open(%s) failed with errno=%d\n", path, errno);
goto CLEANUP;
}
#endif
n_local_procs++;
mca_bmi_sm_component.sm_proc_connect[proc]=SM_CONNECTED;
}
}
}
if( n_local_procs == 0) {
return_code = OMPI_SUCCESS;
goto CLEANUP;
}
/* lookup shared memory pool */
if(NULL == mca_bmi_sm_component.sm_mpool) {
mca_bmi_sm_component.sm_mpool =
mca_mpool_base_module_lookup(mca_bmi_sm_component.sm_mpool_name);
/* Sanity check to ensure that we found it */
if (NULL == mca_bmi_sm_component.sm_mpool) {
return_code = OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
mca_bmi_sm_component.sm_mpool_base =
mca_bmi_sm_component.sm_mpool->mpool_base();
}
/* make sure that my_smp_rank has been defined */
if( 0xFFFFFFFF == mca_bmi_sm_component.my_smp_rank ) {
return_code=OMPI_ERROR;
goto CLEANUP;
}
/* see if need to allocate space for extra procs */
if( 0 > mca_bmi_sm_component.sm_max_procs ) {
/* no limit */
if( 0 <= mca_bmi_sm_component.sm_extra_procs ) {
/* limit */
mca_bmi_sm_component.sm_max_procs=n_local_procs+
mca_bmi_sm_component.sm_extra_procs;
} else {
/* no limit */
mca_bmi_sm_component.sm_max_procs=2*n_local_procs;
}
}
n_to_allocate=mca_bmi_sm_component.sm_max_procs;
/* make sure n_to_allocate is greater than 0 */
if ( !mca_bmi_sm[0].bmi_inited ) {
/* set the shared memory offset */
mca_bmi_sm_component.sm_offset=(ssize_t *)
malloc(n_to_allocate*sizeof(ssize_t));
if(NULL == mca_bmi_sm_component.sm_offset ) {
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* create a list of peers */
mca_bmi_sm_component.sm_peers=(struct mca_bmi_base_endpoint_t**)
malloc(n_to_allocate*sizeof(struct mca_bmi_base_endpoint_t*));
if(NULL == mca_bmi_sm_component.sm_peers ) {
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
}
/* set local proc's smp rank in the peers structure for
* rapid access */
for( proc=0 ; proc < nprocs; proc++ ) {
struct mca_bmi_base_endpoint_t* peer = peers[proc];
if(NULL != peer) {
mca_bmi_sm_component.sm_peers[peer->peer_smp_rank] = peer;
peer->my_smp_rank=mca_bmi_sm_component.my_smp_rank;
}
}
/* Allocate Shared Memory BMI process coordination
* data structure. This will reside in shared memory */
/*
* Create backing file - only first time through
*/
if ( !mca_bmi_sm[0].bmi_inited ) {
/* set file name */
len=asprintf(&(mca_bmi_sm_component.sm_resouce_ctl_file),
"%s/shared_mem_bmi_module.%s",orte_process_info.job_session_dir,
orte_system_info.nodename);
if( 0 > len ) {
goto CLEANUP;
}
size=sizeof(mca_bmi_sm_module_resource_t);
if(NULL==(mca_bmi_sm_component.mmap_file=mca_common_sm_mmap_init(size,
mca_bmi_sm_component.sm_resouce_ctl_file,
sizeof(mca_bmi_sm_module_resource_t), 8 )))
{
ompi_output(0, "mca_bmi_sm_add_procs: unable to create shared memory BMI coordinating strucure :: size %ld \n",
size);
return_code=OMPI_ERROR;
goto CLEANUP;
}
/* set the pointer to the shared memory control structure */
mca_bmi_sm_component.sm_ctl_header=(mca_bmi_sm_module_resource_t *)
mca_bmi_sm_component.mmap_file->map_seg;
/* Allocate a fixed size pointer array for the 2-D Shared memory queues.
* Excess slots will be allocated for future growth. One could
* make this array growable, but then one would need to uses mutexes
* for any access to these queues to ensure data consistancy when
* the array is grown */
if(0 == mca_bmi_sm_component.my_smp_rank ) {
/* allocate ompi_fifo_t strucutes for each fifo of the queue
* pairs - one per pair of local processes */
/* check to make sure number of local procs is within the
* specified limits */
if( ( 0 < mca_bmi_sm_component.sm_max_procs ) &&
( n_local_procs > mca_bmi_sm_component.sm_max_procs) ) {
return_code=OMPI_ERROR;
goto CLEANUP;
}
/* allocate array of ompi_fifo_t* elements -
* offset relative to base segement is stored, so that
* this can be used by other procs */
mca_bmi_sm_component.sm_ctl_header->fifo=
mca_bmi_sm_component.sm_mpool->mpool_alloc
(n_to_allocate*sizeof(ompi_fifo_t *),
CACHE_LINE_SIZE);
if ( NULL == mca_bmi_sm_component.sm_ctl_header->fifo ) {
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* initiazlize the pointer array */
for(i=0 ; i < n_to_allocate ; i++ ) {
mca_bmi_sm_component.sm_ctl_header->fifo[i]=NULL;
}
/* allocate and initialize the array to hold the virtual address
* of the shared memory base */
mca_bmi_sm_component.sm_ctl_header->segment_header.
base_shared_mem_segment = ( volatile char **)
mca_bmi_sm_component.sm_mpool->mpool_alloc
(n_to_allocate*sizeof(char *), CACHE_LINE_SIZE);
if ( NULL == mca_bmi_sm_component.sm_ctl_header->segment_header.
base_shared_mem_segment ) {
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* initialize the pointer array */
for(i=0 ; i < n_to_allocate ; i++ ) {
mca_bmi_sm_component.sm_ctl_header->segment_header.
base_shared_mem_segment[i]=NULL;
}
/* allocate and initialize the array of flags indicating
* when the virtual address of the shared memory address
* has been set */
mca_bmi_sm_component.sm_ctl_header->segment_header.
base_shared_mem_flags = ( int *)
mca_bmi_sm_component.sm_mpool->mpool_alloc
(n_to_allocate*sizeof(int), CACHE_LINE_SIZE);
if ( NULL == mca_bmi_sm_component.sm_ctl_header->segment_header.
base_shared_mem_flags ) {
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
for(i=0 ; i < n_to_allocate ; i++ ) {
mca_bmi_sm_component.sm_ctl_header->segment_header.
base_shared_mem_flags[i]=0;
}
/* set the addresses to be a relative, so that
* they can be used by other procs */
mca_bmi_sm_component.sm_ctl_header->fifo=
(volatile ompi_fifo_t **)
( (char *)(mca_bmi_sm_component.sm_ctl_header->fifo)-
(char *)(mca_bmi_sm_component.sm_mpool->mpool_base()) );
mca_bmi_sm_component.sm_ctl_header->segment_header.
base_shared_mem_segment=( volatile char **)
( (char *)(mca_bmi_sm_component.sm_ctl_header->
segment_header.base_shared_mem_segment) -
(char *)(mca_bmi_sm_component.sm_mpool->mpool_base()) );
/* allow other procs to use this shared memory map */
mca_bmi_sm_component.mmap_file->map_seg->seg_inited=true;
/* memory barrier to ensure this flag is set before other
* flags are set */
ompi_atomic_mb();
}
/* Note: Need to make sure that proc 0 initializes control
* structures before any of the other procs can progress */
if( 0 != mca_bmi_sm_component.my_smp_rank )
{
/* spin unitl local proc 0 initializes the segment */
while(!mca_bmi_sm_component.mmap_file->map_seg->seg_inited)
{ ; }
}
/* set the base of the shared memory segment, and flag
* indicating that it is set */
tmp_ptr=(volatile char **)
( (char *)(mca_bmi_sm_component.sm_ctl_header->segment_header.
base_shared_mem_segment) +
(long )(mca_bmi_sm_component.sm_mpool->mpool_base()) );
tmp_ptr[mca_bmi_sm_component.my_smp_rank]=
mca_bmi_sm_component.sm_mpool->mpool_base();
/* memory barrier to ensure this flag is set before other
* flags are set */
ompi_atomic_mb();
mca_bmi_sm_component.sm_ctl_header->segment_header.
base_shared_mem_flags[mca_bmi_sm_component.my_smp_rank]=1;
/*
* initialize the array of fifo's "owned" by this process
* The virtual addresses are valid only in the sender's
* address space - unless the base of the shared memory
* segment is mapped at the same location in the reader's
* virtual address space.
*/
my_fifos=( ompi_fifo_t *)
mca_bmi_sm_component.sm_mpool->mpool_alloc
(n_to_allocate*sizeof(ompi_fifo_t), CACHE_LINE_SIZE);
if ( NULL == my_fifos ) {
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
for( j=0 ; j < n_to_allocate ; j++ ) {
my_fifos[j].head=OMPI_CB_FREE;
my_fifos[j].tail=OMPI_CB_FREE;
ompi_atomic_unlock(&(my_fifos[j].head_lock));
ompi_atomic_unlock(&(my_fifos[j].tail_lock));
}
fifo_tmp=(ompi_fifo_t * volatile *)
( (char *)(mca_bmi_sm_component.sm_ctl_header->fifo) +
(long)(mca_bmi_sm_component.sm_mpool->mpool_base()) );
/* RLG : need memory barrier */
fifo_tmp[mca_bmi_sm_component.my_smp_rank]=my_fifos;
/* cache the pointer to the 2d fifo array. These addresses
* are valid in the current process space */
mca_bmi_sm_component.fifo=(ompi_fifo_t **)
malloc(sizeof(ompi_fifo_t *)*n_to_allocate);
if( NULL == mca_bmi_sm_component.fifo ) {
return_code=OMPI_ERROR;
goto CLEANUP;
}
mca_bmi_sm_component.fifo[mca_bmi_sm_component.my_smp_rank]=my_fifos;
}
/* cache the pointers to the rest of the fifo arrays */
fifo_tmp=(ompi_fifo_t * volatile *)
( (char *)(mca_bmi_sm_component.sm_ctl_header->fifo) +
(long)(mca_bmi_sm_component.sm_mpool->mpool_base()) );
for( j=mca_bmi_sm_component.num_smp_procs ; j <
mca_bmi_sm_component.num_smp_procs+n_local_procs ; j++ ) {
/* spin until this element is allocated */
while ( NULL == fifo_tmp[j] )
{ ; }
tmp_ptr=(volatile char **)
( (char *)mca_bmi_sm_component.sm_ctl_header->
segment_header.base_shared_mem_segment +
(long)mca_bmi_sm_component.sm_mpool->mpool_base());
diff= tmp_ptr[mca_bmi_sm_component.my_smp_rank]-tmp_ptr[j];
mca_bmi_sm_component.fifo[j]=
( ompi_fifo_t *)( (char *)fifo_tmp[j]+diff);
mca_bmi_sm_component.sm_offset[j]=tmp_ptr[j]-
tmp_ptr[mca_bmi_sm_component.my_smp_rank];
}
/* initialize some of the free-lists */
if( !mca_bmi_sm[0].bmi_inited ) {
/* some initialization happens only the first time this routine
* is called, i.e. when bmi_inited is false */
/* initialize fragment descriptor free list */
/*
* first fragment
*/
/* allocation will be for the fragment descriptor, payload buffer,
* and padding to ensure proper alignment can be acheived */
length=sizeof(mca_bmi_sm_frag_t)+
mca_bmi_sm_component.fragment_alignment+
mca_bmi_sm_component.first_fragment_size;
ompi_free_list_init(&mca_bmi_sm_component.sm_frags1, length,
OBJ_CLASS(mca_bmi_sm_frag1_t),
mca_bmi_sm_component.sm_free_list_num,
mca_bmi_sm_component.sm_free_list_max,
mca_bmi_sm_component.sm_free_list_inc,
mca_bmi_sm_component.sm_mpool); /* use shared-memory pool */
length=sizeof(mca_bmi_sm_frag_t)+
mca_bmi_sm_component.fragment_alignment+
mca_bmi_sm_component.max_fragment_size;
ompi_free_list_init(&mca_bmi_sm_component.sm_frags2, length,
OBJ_CLASS(mca_bmi_sm_frag2_t),
mca_bmi_sm_component.sm_free_list_num,
mca_bmi_sm_component.sm_free_list_max,
mca_bmi_sm_component.sm_free_list_inc,
mca_bmi_sm_component.sm_mpool); /* use shared-memory pool */
/* set up mca_bmi_sm_component.list_smp_procs_same_base_addr */
mca_bmi_sm_component.list_smp_procs_same_base_addr=(int *)
malloc(mca_bmi_sm_component.sm_max_procs*sizeof(int));
if( NULL == mca_bmi_sm_component.list_smp_procs_same_base_addr ){
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* set up mca_bmi_sm_component.list_smp_procs_different_base_addr */
mca_bmi_sm_component.list_smp_procs_different_base_addr=(int *)
malloc(mca_bmi_sm_component.sm_max_procs*sizeof(int));
if( NULL == mca_bmi_sm_component.list_smp_procs_different_base_addr ){
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* set flag indicating bmi has been inited */
bmi_sm->bmi_inited=true;
}
/* set connectivity */
cnt=0;
for(proc = 0 ; proc < nprocs ; proc++ ) {
struct mca_bmi_base_endpoint_t* peer = peers[proc];
if(peer == NULL)
continue;
tmp_ptr=(volatile char **)
( (char *)mca_bmi_sm_component.sm_ctl_header->
segment_header.base_shared_mem_segment +
(long)mca_bmi_sm_component.sm_mpool->mpool_base());
same_sm_base=(tmp_ptr[peer->peer_smp_rank] ==
tmp_ptr[mca_bmi_sm_component.my_smp_rank]);
if( SM_CONNECTED == mca_bmi_sm_component.sm_proc_connect[proc] ) {
if( same_sm_base ){
/* don't count if same process */
if( (mca_bmi_sm_component.num_smp_procs+cnt ) ==
mca_bmi_sm_component.my_smp_rank) {
cnt++;
continue;
}
/* set up the list of local processes with the same base
* shared memory virtual address as this process */
mca_bmi_sm_component.list_smp_procs_same_base_addr
[mca_bmi_sm_component.num_smp_procs_same_base_addr]=
cnt;
mca_bmi_sm_component.num_smp_procs_same_base_addr++;
cnt++;
/* add this proc to shared memory accessability list */
return_code=ompi_bitmap_set_bit(reachability,proc);
if( OMPI_SUCCESS != return_code ){
goto CLEANUP;
}
} else {
/* set up the list of local processes with the same base
* shared memory virtual address as this process */
mca_bmi_sm_component.list_smp_procs_different_base_addr
[mca_bmi_sm_component.num_smp_procs_different_base_addr]=
cnt;
mca_bmi_sm_component.num_smp_procs_different_base_addr++;
cnt++;
mca_bmi_sm_component.sm_proc_connect[proc]=
SM_CONNECTED_DIFFERENT_BASE_ADDR;
}
}
}
/* update the local smp process count */
mca_bmi_sm_component.num_smp_procs+=n_local_procs;
CLEANUP:
/* free local memory */
if(sm_proc_info){
/* free the memory allocated by mca_base_modex_recv */
for( proc=0 ; proc < nprocs; proc++ ) {
if(sm_proc_info[proc]){
free(sm_proc_info[proc]);
sm_proc_info[proc]=NULL;
}
}
free(sm_proc_info);
sm_proc_info=NULL;
}
return return_code;
}
/* Note:: this routine assumes that mca_bmi_sm_add_procs_same_base_addr
* has already been called to set up data structures needed by this
* routine */
int mca_bmi_sm_add_procs(
struct mca_bmi_base_module_t* bmi,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_bmi_base_endpoint_t **peers,
ompi_bitmap_t* reachability)
{
int return_code = OMPI_SUCCESS, tmp_cnt;
uint32_t proc, n_local_procs;
/* initializion */
for(proc=0 ; proc < nprocs ; proc++ ) {
peers[proc]=NULL;
}
/* figure out total number of local procs in current set */
tmp_cnt=0;
for(proc = 0 ; proc < nprocs ; proc++ ) {
if( (SM_CONNECTED_DIFFERENT_BASE_ADDR ==
mca_bmi_sm_component.sm_proc_connect[proc]) ||
(SM_CONNECTED ==
mca_bmi_sm_component.sm_proc_connect[proc]) ) {
tmp_cnt++;
}
}
/* set connectivity */
n_local_procs=0;
for(proc = 0 ; proc < nprocs ; proc++ ) {
if( (SM_CONNECTED_DIFFERENT_BASE_ADDR ==
mca_bmi_sm_component.sm_proc_connect[proc]) ||
(SM_CONNECTED ==
mca_bmi_sm_component.sm_proc_connect[proc]) ) {
n_local_procs++;
}
if( (SM_CONNECTED_DIFFERENT_BASE_ADDR ==
mca_bmi_sm_component.sm_proc_connect[proc]) ) {
/* add this proc to shared memory accessability list */
return_code=ompi_bitmap_set_bit(reachability,proc);
if( OMPI_SUCCESS != return_code ){
goto CLEANUP;
}
/* initialize the peers information */
peers[proc]=malloc(sizeof(struct mca_bmi_base_endpoint_t));
if( NULL == peers[proc] ){
return_code=OMPI_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
peers[proc]->my_smp_rank=mca_bmi_sm_component.my_smp_rank;
/* subtract tmp_cnt, since mca_bmi_sm_add_procs_same_base_addr
* already added these into num_smp_procs */
peers[proc]->peer_smp_rank=n_local_procs+
mca_bmi_sm_component.num_smp_procs-tmp_cnt;
n_local_procs++;
}
}
CLEANUP:
/* free local memory */
if(mca_bmi_sm_component.sm_proc_connect){
free(mca_bmi_sm_component.sm_proc_connect);
mca_bmi_sm_component.sm_proc_connect=NULL;
}
return return_code;
}
int mca_bmi_sm_del_procs(
struct mca_bmi_base_module_t* bmi,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_bmi_base_endpoint_t **peers)
{
return OMPI_SUCCESS;
}
/**
* MCA->BMI Clean up any resources held by BMI module
* before the module is unloaded.
*
* @param bmi (IN) BMI module.
*
* Prior to unloading a BMI module, the MCA framework will call
* the BMI finalize method of the module. Any resources held by
* the BMI should be released and if required the memory corresponding
* to the BMI module freed.
*
*/
int mca_bmi_sm_finalize(struct mca_bmi_base_module_t* bmi)
{
return OMPI_SUCCESS;
}
/**
* Register a callback function that is called on receipt
* of a fragment.
*
* @param bmi (IN) BMI module
* @return Status indicating if cleanup was successful
*
* When the process list changes, the PML notifies the BMI of the
* change, to provide the opportunity to cleanup or release any
* resources associated with the peer.
*/
int mca_bmi_sm_register(
struct mca_bmi_base_module_t* bmi,
mca_bmi_base_tag_t tag,
mca_bmi_base_module_recv_cb_fn_t cbfunc,
void* cbdata)
{
mca_bmi_sm_t* sm_bmi = (mca_bmi_sm_t*)bmi;
sm_bmi->sm_reg[tag].cbfunc = cbfunc;
sm_bmi->sm_reg[tag].cbdata = cbdata;
return OMPI_SUCCESS;
}
/**
* Allocate a segment.
*
* @param bmi (IN) BMI module
* @param size (IN) Request segment size.
*/
extern mca_bmi_base_descriptor_t* mca_bmi_sm_alloc(
struct mca_bmi_base_module_t* bmi,
size_t size)
{
mca_bmi_sm_frag_t* frag;
int rc;
if(size <= mca_bmi_sm_component.first_fragment_size) {
MCA_BMI_SM_FRAG_ALLOC1(frag,rc);
} else {
MCA_BMI_SM_FRAG_ALLOC2(frag,rc);
}
return (mca_bmi_base_descriptor_t*)frag;
}
/**
* Return a segment allocated by this BMI.
*
* @param bmi (IN) BMI module
* @param segment (IN) Allocated segment.
*/
extern int mca_bmi_sm_free(
struct mca_bmi_base_module_t* bmi,
mca_bmi_base_descriptor_t* des)
{
mca_bmi_sm_frag_t* frag = (mca_bmi_sm_frag_t*)des;
if(frag->size <= mca_bmi_sm_component.first_fragment_size) {
MCA_BMI_SM_FRAG_RETURN1(des);
} else {
MCA_BMI_SM_FRAG_RETURN2(des);
}
return OMPI_SUCCESS;
}
/**
* Pack data
*
* @param bmi (IN) BMI module
* @param peer (IN) BMI peer addressing
*/
struct mca_bmi_base_descriptor_t* mca_bmi_sm_pack(
struct mca_bmi_base_module_t* bmi,
struct mca_bmi_base_endpoint_t* peer,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size)
{
return NULL;
}
/**
* Initiate a send to the peer.
*
* @param bmi (IN) BMI module
* @param peer (IN) BMI peer addressing
*/
int mca_bmi_sm_send(
struct mca_bmi_base_module_t* bmi,
struct mca_bmi_base_endpoint_t* endpoint,
struct mca_bmi_base_descriptor_t* descriptor,
mca_bmi_base_tag_t tag)
{
mca_bmi_sm_frag_t* frag = (mca_bmi_sm_frag_t*)descriptor;
int rc;
frag->tag = tag;
frag->type = MCA_BMI_SM_FRAG_SEND;
frag->rc = OMPI_SUCCESS;
/*
* post the descriptor in the queue - post with the relative
* address
*/
MCA_BMI_SM_FIFO_WRITE(endpoint->my_smp_rank,endpoint->peer_smp_rank, frag, rc);
return rc;
}

355
src/mca/bmi/sm/bmi_sm.h Обычный файл
Просмотреть файл

@ -0,0 +1,355 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_BMI_SM_H
#define MCA_BMI_SM_H
#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include "class/ompi_free_list.h"
#include "class/ompi_bitmap.h"
#include "class/ompi_fifo.h"
#include "event/event.h"
#include "mca/pml/pml.h"
#include "mca/bmi/bmi.h"
#include "mca/mpool/mpool.h"
#include "mca/common/sm/common_sm_mmap.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* Shared Memory resource managment
*/
struct mca_bmi_sm_module_resource_t {
/* base control structures */
mca_common_sm_file_header_t segment_header;
/* fifo queues - offsets relative to the base of the share memory
* segment will be stored here */
volatile ompi_fifo_t **fifo;
};
typedef struct mca_bmi_sm_module_resource_t mca_bmi_sm_module_resource_t;
extern mca_bmi_sm_module_resource_t mca_bmi_sm_module_resource;
#define SM_CONNECTED 1
#define SM_CONNECTED_SAME_BASE_ADDR 2
#define SM_CONNECTED_DIFFERENT_BASE_ADDR 3
#if OMPI_ENABLE_PROGRESS_THREADS == 1
#define DATA (char)0
#define DONE (char)1
#endif
struct mca_bmi_sm_registration_t {
mca_bmi_base_module_recv_cb_fn_t cbfunc;
void *cbdata;
};
typedef struct mca_bmi_sm_registration_t mca_bmi_sm_registration_t;
/**
* Shared Memory (SM) BMI module.
*/
struct mca_bmi_sm_component_t {
mca_bmi_base_component_1_0_0_t super; /**< base BMI component */
int sm_free_list_num; /**< initial size of free lists */
int sm_free_list_max; /**< maximum size of free lists */
int sm_free_list_inc; /**< number of elements to alloc when growing free lists */
int sm_max_procs; /**< upper limit on the number of processes using the shared memory pool */
int sm_extra_procs; /**< number of extra procs to allow */
char* sm_mpool_name; /**< name of shared memory pool module */
mca_mpool_base_module_t* sm_mpool; /**< shared memory pool */
void* sm_mpool_base; /**< base address of shared memory pool */
size_t first_fragment_size; /**< first fragment size */
size_t max_fragment_size; /**< maximum (second and
beyone) fragment size */
size_t fragment_alignment; /**< fragment alignment */
ompi_mutex_t sm_lock;
char* sm_resouce_ctl_file; /**< name of shared memory file used
to coordinate resource usage */
mca_common_sm_mmap_t *mmap_file; /**< description of mmap'ed file */
mca_bmi_sm_module_resource_t *sm_ctl_header; /* control header in
shared memory */
ompi_fifo_t **fifo; /**< cached copy of the pointer to the 2D
fifo array. The address in the shared
memory segment sm_ctl_header is a relative,
but this one, in process private memory, is
a real virtual address */
size_t size_of_cb_queue; /**< size of each circular buffer queue array */
size_t cb_lazy_free_freq; /**< frequency of lazy free */
ssize_t *sm_offset; /**< offset to be applied to shared memory
addresses, per local process value */
int *sm_proc_connect; /* scratch array used by the 0'th bmi to
* set indicate sm connectivty. Used by
* the 1'st bmi */
size_t num_smp_procs; /**< current number of smp procs on this
host */
int num_smp_procs_same_base_addr; /* number of procs with same
base shared memory virtual
address as this process */
int num_smp_procs_different_base_addr; /* number of procs with
different base shared memory
virtual address as this
process */
int *list_smp_procs_same_base_addr; /* number of procs with same
base shared memory virtual
address as this process */
int *list_smp_procs_different_base_addr; /* number of procs with different
base shared memory virtual
address as this process */
uint32_t my_smp_rank; /**< My SMP process rank. Used for accessing
* SMP specfic data structures. */
ompi_free_list_t sm_frags1; /**< free list of sm first */
ompi_free_list_t sm_frags2; /**< free list of sm second */
ompi_free_list_t sm_first_frags_to_progress; /**< list of first
fragments that are
awaiting resources */
struct mca_bmi_base_endpoint_t **sm_peers;
#if OMPI_ENABLE_PROGRESS_THREADS == 1
char sm_fifo_path[PATH_MAX]; /**< path to fifo used to signal this process */
int sm_fifo_fd; /**< file descriptor corresponding to opened fifo */
ompi_thread_t sm_fifo_thread;
#endif
};
typedef struct mca_bmi_sm_component_t mca_bmi_sm_component_t;
extern mca_bmi_sm_component_t mca_bmi_sm_component;
/**
* Register shared memory module parameters with the MCA framework
*/
extern int mca_bmi_sm_component_open(void);
/**
* Any final cleanup before being unloaded.
*/
extern int mca_bmi_sm_component_close(void);
/**
* SM module initialization.
*
* @param num_bmis (OUT) Number of BMIs returned in BMI array.
* @param enable_progress_threads (IN) Flag indicating whether BMI is allowed to have progress threads
* @param enable_mpi_threads (IN) Flag indicating whether BMI must support multilple simultaneous invocations from different threads
*
*/
extern mca_bmi_base_module_t** mca_bmi_sm_component_init(
int *num_bmis,
bool enable_progress_threads,
bool enable_mpi_threads
);
/**
* shared memory component progress.
*/
extern int mca_bmi_sm_component_progress(void);
/**
* SM BMI Interface
*/
struct mca_bmi_sm_t {
mca_bmi_base_module_t super; /**< base BMI interface */
bool bmi_inited; /**< flag indicating if bmi has been inited */
mca_bmi_sm_registration_t sm_reg[256];
};
typedef struct mca_bmi_sm_t mca_bmi_sm_t;
extern mca_bmi_sm_t mca_bmi_sm[2];
/**
* Cleanup any resources held by the BMI.
*
* @param bmi BMI instance.
* @return OMPI_SUCCESS or error status on failure.
*/
extern int mca_bmi_sm_finalize(
struct mca_bmi_base_module_t* bmi
);
/**
* PML->BMI notification of change in the process list.
* PML->BMI Notification that a receive fragment has been matched.
* Called for message that is send from process with the virtual
* address of the shared memory segment being different than that of
* the receiver.
*
* @param bmi (IN)
* @param proc (IN)
* @param peer (OUT)
* @return OMPI_SUCCESS or error status on failure.
*
*/
extern int mca_bmi_sm_add_procs(
struct mca_bmi_base_module_t* bmi,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_bmi_base_endpoint_t** peers,
struct ompi_bitmap_t* reachability
);
/**
* PML->BMI notification of change in the process list.
* PML->BMI Notification that a receive fragment has been matched.
* Called for message that is send from process with the virtual
* address of the shared memory segment being the same as that of
* the receiver.
*
* @param bmi (IN)
* @param proc (IN)
* @param peer (OUT)
* @return OMPI_SUCCESS or error status on failure.
*
*/
extern int mca_bmi_sm_add_procs_same_base_addr(
struct mca_bmi_base_module_t* bmi,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_bmi_base_endpoint_t** peers,
ompi_bitmap_t* reachability
);
/**
* PML->BMI notification of change in the process list.
*
* @param bmi (IN) BMI instance
* @param proc (IN) Peer process
* @param peer (IN) Peer addressing information.
* @return Status indicating if cleanup was successful
*
*/
extern int mca_bmi_sm_del_procs(
struct mca_bmi_base_module_t* bmi,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_bmi_base_endpoint_t **peers
);
/**
* Register a callback function that is called on receipt
* of a fragment.
*
* @param bmi (IN) BMI module
* @return Status indicating if cleanup was successful
*
* When the process list changes, the PML notifies the BMI of the
* change, to provide the opportunity to cleanup or release any
* resources associated with the peer.
*/
extern int mca_bmi_sm_register(
struct mca_bmi_base_module_t* bmi,
mca_bmi_base_tag_t tag,
mca_bmi_base_module_recv_cb_fn_t cbfunc,
void* cbdata
);
/**
* Allocate a segment.
*
* @param bmi (IN) BMI module
* @param size (IN) Request segment size.
*/
extern mca_bmi_base_descriptor_t* mca_bmi_sm_alloc(
struct mca_bmi_base_module_t* bmi,
size_t size
);
/**
* Return a segment allocated by this BMI.
*
* @param bmi (IN) BMI module
* @param segment (IN) Allocated segment.
*/
extern int mca_bmi_sm_free(
struct mca_bmi_base_module_t* bmi,
mca_bmi_base_descriptor_t* segment
);
/**
* Pack data
*
* @param bmi (IN) BMI module
* @param peer (IN) BMI peer addressing
*/
struct mca_bmi_base_descriptor_t* mca_bmi_sm_pack(
struct mca_bmi_base_module_t* bmi,
struct mca_bmi_base_endpoint_t* peer,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size
);
/**
* Initiate a send to the peer.
*
* @param bmi (IN) BMI module
* @param peer (IN) BMI peer addressing
*/
extern int mca_bmi_sm_send(
struct mca_bmi_base_module_t* bmi,
struct mca_bmi_base_endpoint_t* endpoint,
struct mca_bmi_base_descriptor_t* descriptor,
mca_bmi_base_tag_t tag
);
/**
* Data structure used to hold information that will be exchanged with
* all other procs at startup. !!!!! This is only temporary, until the
* registry is complete
*/
#define MCA_BMI_SM_MAX_HOSTNAME_LEN 128
typedef struct mca_bmi_sm_exchange{
char host_name[MCA_BMI_SM_MAX_HOSTNAME_LEN];
}mca_bmi_sm_exchange_t;
#if OMPI_ENABLE_PROGRESS_THREADS == 1
void mca_bmi_sm_component_event_thread(ompi_object_t*);
#endif
#if OMPI_ENABLE_PROGRESS_THREADS == 1
#define MCA_BMI_SM_SIGNAL_PEER(peer) \
{ \
unsigned char cmd = DATA; \
if(write(peer->fifo_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) { \
ompi_output(0, "mca_bmi_sm_send: write fifo failed: errno=%d\n", errno); \
} \
}
#else
#define MCA_BMI_SM_SIGNAL_PEER(peer)
#endif
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

491
src/mca/bmi/sm/bmi_sm_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,491 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h> /* for mkfifo */
#include "include/constants.h"
#include "include/sys/cache.h"
#include "event/event.h"
#include "util/if.h"
#include "util/argv.h"
#include "util/output.h"
#include "util/sys_info.h"
#include "util/proc_info.h"
#include "mca/pml/pml.h"
#include "mca/base/mca_base_param.h"
#include "mca/base/mca_base_module_exchange.h"
#include "mca/mpool/base/base.h"
#include "mca/common/sm/common_sm_mmap.h"
#include "bmi_sm.h"
#include "bmi_sm_frag.h"
#include "bmi_sm_fifo.h"
/*
* Local utility functions.
*/
static int mca_bmi_sm_component_exchange(void);
/*
* Shared Memory (SM) component instance.
*/
mca_bmi_sm_component_t mca_bmi_sm_component = {
{ /* super is being filled in */
/* First, the mca_base_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a pml v1.0.0 component (which also implies a
specific MCA version) */
MCA_BMI_BASE_VERSION_1_0_0,
"sm", /* MCA component name */
1, /* MCA component major version */
0, /* MCA component minor version */
0, /* MCA component release version */
mca_bmi_sm_component_open, /* component open */
mca_bmi_sm_component_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
mca_bmi_sm_component_init,
mca_bmi_sm_component_progress,
} /* end super */
};
/*
* utility routines for parameter registration
*/
static inline char* mca_bmi_sm_param_register_string(
const char* param_name,
const char* default_value)
{
char *param_value;
int id = mca_base_param_register_string("ptl","sm",param_name,NULL,default_value);
mca_base_param_lookup_string(id, &param_value);
return param_value;
}
static inline int mca_bmi_sm_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("ptl","sm",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
/*
* Called by MCA framework to open the component, registers
* component parameters.
*/
int mca_bmi_sm_component_open(void)
{
/* register SM component parameters */
mca_bmi_sm_component.sm_free_list_num =
mca_bmi_sm_param_register_int("free_list_num", 256);
mca_bmi_sm_component.sm_free_list_max =
mca_bmi_sm_param_register_int("free_list_max", -1);
mca_bmi_sm_component.sm_free_list_inc =
mca_bmi_sm_param_register_int("free_list_inc", 256);
mca_bmi_sm_component.sm_max_procs =
mca_bmi_sm_param_register_int("max_procs", -1);
mca_bmi_sm_component.sm_extra_procs =
mca_bmi_sm_param_register_int("sm_extra_procs", -1);
mca_bmi_sm_component.sm_mpool_name =
mca_bmi_sm_param_register_string("mpool", "sm");
mca_bmi_sm_component.first_fragment_size =
mca_bmi_sm_param_register_int("first_fragment_size", 1024);
mca_bmi_sm_component.max_fragment_size =
mca_bmi_sm_param_register_int("max_fragment_size", 8*1024);
mca_bmi_sm_component.fragment_alignment =
mca_bmi_sm_param_register_int("fragment_alignment",
CACHE_LINE_SIZE);
mca_bmi_sm_component.size_of_cb_queue =
mca_bmi_sm_param_register_int("size_of_cb_queue", 128);
mca_bmi_sm_component.cb_lazy_free_freq =
mca_bmi_sm_param_register_int("cb_lazy_free_freq", 120);
/* make sure that queue size and lazy free frequency are consistent -
* want to make sure that slots are freed at a rate they can be
* reused, w/o allocating extra new circular buffer fifo arrays */
if( (float)(mca_bmi_sm_component.cb_lazy_free_freq) >=
0.95*(float)(mca_bmi_sm_component.size_of_cb_queue) ) {
/* upper limit */
mca_bmi_sm_component.cb_lazy_free_freq=
(int)(0.95*(float)(mca_bmi_sm_component.size_of_cb_queue));
/* lower limit */
if( 0>= mca_bmi_sm_component.cb_lazy_free_freq ) {
mca_bmi_sm_component.cb_lazy_free_freq=1;
}
}
/* default number of extra procs to allow for future growth */
mca_bmi_sm_component.sm_extra_procs =
mca_bmi_sm_param_register_int("sm_extra_procs", 2);
/* initialize objects */
OBJ_CONSTRUCT(&mca_bmi_sm_component.sm_lock, ompi_mutex_t);
OBJ_CONSTRUCT(&mca_bmi_sm_component.sm_frags1, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_bmi_sm_component.sm_frags2, ompi_free_list_t);
return OMPI_SUCCESS;
}
/*
* component cleanup - sanity checking of queue lengths
*/
int mca_bmi_sm_component_close(void)
{
int return_value=OMPI_SUCCESS;
OBJ_DESTRUCT(&mca_bmi_sm_component.sm_lock);
OBJ_DESTRUCT(&mca_bmi_sm_component.sm_frags1);
OBJ_DESTRUCT(&mca_bmi_sm_component.sm_frags2);
/* unmap the shared memory control structure */
if(mca_bmi_sm_component.mmap_file != NULL) {
return_value=munmap(mca_bmi_sm_component.mmap_file->map_addr,
mca_bmi_sm_component.mmap_file->map_size);
if(-1 == return_value) {
return_value=OMPI_ERROR;
ompi_output(0," munmap failed :: file - %s :: errno - %d \n",
mca_bmi_sm_component.mmap_file->map_addr,
errno);
goto CLEANUP;
}
/* unlink file, so that it will be deleted when all references
* to it are gone - no error checking, since we want all procs
* to call this, so that in an abnormal termination scanario,
* this file will still get cleaned up */
unlink(mca_bmi_sm_component.mmap_file->map_path);
}
#if OMPI_ENABLE_PROGRESS_THREADS == 1
/* close/cleanup fifo create for event notification */
if(mca_bmi_sm_component.sm_fifo_fd > 0) {
/* write a done message down the pipe */
unsigned char cmd = DONE;
if( write(mca_bmi_sm_component.sm_fifo_fd,&cmd,sizeof(cmd)) !=
sizeof(cmd)){
ompi_output(0, "mca_bmi_sm_component_close: write fifo failed: errno=%d\n",
errno);
}
ompi_thread_join(&mca_bmi_sm_component.sm_fifo_thread, NULL);
close(mca_bmi_sm_component.sm_fifo_fd);
unlink(mca_bmi_sm_component.sm_fifo_path);
}
#endif
CLEANUP:
/* return */
return return_value;
}
/*
* SM component initialization
*/
mca_bmi_base_module_t** mca_bmi_sm_component_init(
int *num_ptls,
bool enable_progress_threads,
bool enable_mpi_threads)
{
mca_bmi_base_module_t **ptls = NULL;
int i;
*num_ptls = 0;
/* lookup/create shared memory pool only when used */
mca_bmi_sm_component.sm_mpool = NULL;
mca_bmi_sm_component.sm_mpool_base = NULL;
/* publish shared memory parameters with the MCA framework */
if (OMPI_SUCCESS != mca_bmi_sm_component_exchange()) {
return NULL;
}
#if OMPI_ENABLE_PROGRESS_THREADS == 1
/* create a named pipe to receive events */
sprintf(mca_bmi_sm_component.sm_fifo_path,
"%s/sm_fifo.%d", orte_process_info.job_session_dir,
orte_process_info.my_name->vpid);
if(mkfifo(mca_bmi_sm_component.sm_fifo_path, 0660) < 0) {
ompi_output(0, "mca_bmi_sm_component_init: mkfifo failed with errno=%d\n",errno);
return NULL;
}
mca_bmi_sm_component.sm_fifo_fd = open(mca_bmi_sm_component.sm_fifo_path, O_RDWR);
if(mca_bmi_sm_component.sm_fifo_fd < 0) {
ompi_output(0, "mca_bmi_sm_component_init: open(%s) failed with errno=%d\n",
mca_bmi_sm_component.sm_fifo_path, errno);
return NULL;
}
OBJ_CONSTRUCT(&mca_bmi_sm_component.sm_fifo_thread, ompi_thread_t);
mca_bmi_sm_component.sm_fifo_thread.t_run = (ompi_thread_fn_t) mca_bmi_sm_component_event_thread;
ompi_thread_start(&mca_bmi_sm_component.sm_fifo_thread);
#endif
/* allocate the Shared Memory PTL */
*num_ptls = 2;
ptls = malloc((*num_ptls)*sizeof(mca_bmi_base_module_t*));
if (NULL == ptls) {
return NULL;
}
/* get pointer to the ptls */
ptls[0] = (mca_bmi_base_module_t *)(&(mca_bmi_sm[0]));
ptls[1] = (mca_bmi_base_module_t *)(&(mca_bmi_sm[1]));
/* set scheduling parameters */
for( i=0 ; i < 2 ; i++ ) {
mca_bmi_sm[i].super.bmi_first_frag_size=mca_bmi_sm_component.first_fragment_size;
mca_bmi_sm[i].super.bmi_min_frag_size=mca_bmi_sm_component.max_fragment_size;
mca_bmi_sm[i].super.bmi_max_frag_size=mca_bmi_sm_component.max_fragment_size;
mca_bmi_sm[i].super.bmi_exclusivity=100; /* always use this ptl */
mca_bmi_sm[i].super.bmi_latency=100; /* lowest latency */
mca_bmi_sm[i].super.bmi_bandwidth=900; /* not really used now since exclusivity is set to 100 */
}
/* initialize some PTL data */
/* start with no SM procs */
mca_bmi_sm_component.num_smp_procs = 0;
mca_bmi_sm_component.my_smp_rank = 0xFFFFFFFF; /* not defined */
/* set flag indicating ptl not inited */
mca_bmi_sm[0].bmi_inited=false;
mca_bmi_sm[1].bmi_inited=false;
return ptls;
}
/*
* SM component progress.
*/
#if OMPI_ENABLE_PROGRESS_THREADS == 1
void mca_bmi_sm_component_event_thread(ompi_object_t* thread)
{
while(1) {
unsigned char cmd;
if(read(mca_bmi_sm_component.sm_fifo_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) {
/* error condition */
return;
}
if( DONE == cmd ){
/* return when done message received */
return;
}
mca_bmi_sm_component_progress(0);
}
}
#endif
int mca_bmi_sm_component_progress(void)
{
/* local variables */
unsigned int peer_smp_rank ;
mca_bmi_sm_frag_t *frag;
ompi_fifo_t *fifo = NULL;
int my_smp_rank=mca_bmi_sm_component.my_smp_rank;
int proc;
int rc = 0;
/* send progress is made by the PML */
/*
* receive progress
*/
/* poll each fifo */
/* loop over fifo's - procs with same base shared memory
* virtual address as this process */
for( proc=0 ; proc < mca_bmi_sm_component.num_smp_procs_same_base_addr
; proc++ )
{
peer_smp_rank= mca_bmi_sm_component.list_smp_procs_same_base_addr[proc];
fifo=&(mca_bmi_sm_component.fifo[peer_smp_rank][my_smp_rank]);
/* if fifo is not yet setup - continue - not data has been sent*/
if(OMPI_CB_FREE == fifo->tail){
continue;
}
/* aquire thread lock */
if( ompi_using_threads() ) {
ompi_atomic_lock( &(fifo->tail_lock) );
}
/* get pointer - pass in offset to change queue pointer
* addressing from that of the sender */
frag = (mca_bmi_sm_frag_t *)
ompi_fifo_read_from_tail_same_base_addr( fifo );
if( OMPI_CB_FREE == frag ) {
/* release thread lock */
if( ompi_using_threads() ) {
ompi_atomic_unlock(&(fifo->tail_lock));
}
continue;
}
/* release thread lock */
if( ompi_using_threads() ) {
ompi_atomic_unlock(&(fifo->tail_lock));
}
/* dispatch fragment by type */
switch(frag->type) {
case MCA_BMI_SM_FRAG_ACK:
{
/* completion callback */
frag->base.des_cbfunc(&mca_bmi_sm[0].super, frag->endpoint, &frag->base, frag->rc);
break;
}
case MCA_BMI_SM_FRAG_SEND:
{
/* recv upcall */
mca_bmi_sm_registration_t* reg = mca_bmi_sm[0].sm_reg + frag->tag;
reg->cbfunc(&mca_bmi_sm[0].super,frag->tag,&frag->base,reg->cbdata);
frag->type = MCA_BMI_SM_FRAG_ACK;
MCA_BMI_SM_FIFO_WRITE(my_smp_rank,peer_smp_rank,frag,rc);
if(OMPI_SUCCESS != rc)
return rc;
break;
}
default:
{
/* unknown */
frag->rc = OMPI_ERROR;
frag->type = MCA_BMI_SM_FRAG_ACK;
MCA_BMI_SM_FIFO_WRITE(my_smp_rank,peer_smp_rank,frag,rc);
if(OMPI_SUCCESS != rc)
return rc;
break;
}
}
rc++;
} /* end peer_local_smp_rank loop */
#if 0
/* loop over fifo's - procs with different base shared memory
* virtual address as this process */
for( proc=0 ; proc < mca_bmi_sm_component.num_smp_procs_different_base_addr
; proc++ )
{
peer_smp_rank= mca_bmi_sm_component.list_smp_procs_different_base_addr[proc];
fifo=&(mca_bmi_sm_component.fifo[peer_smp_rank][my_smp_rank]);
/* if fifo is not yet setup - continue - not data has been sent*/
if(OMPI_CB_FREE == fifo->tail){
continue;
}
/* aquire thread lock */
if( ompi_using_threads() ) {
ompi_atomic_lock(&(fifo->tail_lock));
}
/* get pointer - pass in offset to change queue pointer
* addressing from that of the sender */
frag=(mca_bmi_sm_frag_t *)ompi_fifo_read_from_tail( fifo,
mca_bmi_sm_component.sm_offset[peer_smp_rank]);
if( OMPI_CB_FREE == frag ) {
/* release thread lock */
if( ompi_using_threads() ) {
ompi_atomic_unlock(&(fifo->tail_lock));
}
continue;
}
/* release thread lock */
if( ompi_using_threads() ) {
ompi_atomic_unlock(&(fifo->tail_lock));
}
/* change the address from address relative to the shared
* memory address, to a true virtual address */
frag = (mca_bmi_sm_frag_t *)( (char *)frag+
mca_bmi_sm_component.sm_offset[peer_smp_rank]);
rc++;
} /* end peer_local_smp_rank loop */
#endif
return rc;
}
/*
*
*/
static int mca_bmi_sm_component_exchange()
{
mca_bmi_sm_exchange_t mca_bmi_sm_setup_info;
size_t len,size;
char *ptr;
int rc;
/* determine length of host name */
len=strlen(orte_system_info.nodename);
/* check if string is zero length or there is an error */
if( 0 >= len) {
return OMPI_ERROR;
}
/* check if string is too long */
if( MCA_BMI_SM_MAX_HOSTNAME_LEN < (len+1) ){
return OMPI_ERROR;
}
/* copy string into structure that will be used to send data around */
ptr=NULL;
ptr=strncpy(&(mca_bmi_sm_setup_info.host_name[0]),
orte_system_info.nodename, len);
if( NULL == ptr ) {
return OMPI_ERROR;
}
mca_bmi_sm_setup_info.host_name[len]='\0';
/* exchange setup information */
size=sizeof(mca_bmi_sm_exchange_t);
rc = mca_base_modex_send(&mca_bmi_sm_component.super.bmi_version,
&mca_bmi_sm_setup_info, size);
return OMPI_SUCCESS;
}

43
src/mca/bmi/sm/bmi_sm_endpoint.h Обычный файл
Просмотреть файл

@ -0,0 +1,43 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_BMI_SM_ENDPOINT_H
#define MCA_BMI_SM_ENDPOINT_H
#if OMPI_ENABLE_PROGRESS_THREADS == 1
#include "event/event.h"
#endif
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_ptl_base_endpoint_t is associated w/ each process
* and BMI pair at startup.
*/
struct mca_bmi_base_endpoint_t {
int my_smp_rank; /**< My SMP process rank. Used for accessing
* SMP specfic data structures. */
int peer_smp_rank; /**< My peer's SMP process rank. Used for accessing
* SMP specfic data structures. */
#if OMPI_ENABLE_PROGRESS_THREADS == 1
int fifo_fd; /**< pipe/fifo used to signal endpoint that data is queued */
#endif
};
#endif

45
src/mca/bmi/sm/bmi_sm_fifo.h Обычный файл
Просмотреть файл

@ -0,0 +1,45 @@
#ifndef MCA_BMI_SM_FIFO_H
#define MCA_BMI_SM_FIFO_H
#include "bmi_sm.h"
#define MCA_BMI_SM_FIFO_WRITE(my_smp_rank,peer_smp_rank,frag,rc) \
do { \
ompi_fifo_t* fifo; \
fifo=&(mca_bmi_sm_component.fifo[my_smp_rank][peer_smp_rank]); \
\
/* thread lock */ \
if(ompi_using_threads()) \
ompi_atomic_lock(&fifo->head_lock); \
if(OMPI_CB_FREE == fifo->head) { \
/* no queues have been allocated - allocate now */ \
rc=ompi_fifo_init_same_base_addr( \
mca_bmi_sm_component.size_of_cb_queue, \
mca_bmi_sm_component.cb_lazy_free_freq, \
/* at this stage we are not doing anything with memory \
* locality */ \
0,0,0, \
fifo, mca_bmi_sm_component.sm_mpool); \
if( rc != OMPI_SUCCESS ) { \
if(ompi_using_threads()) \
ompi_atomic_unlock(&(fifo->head_lock)); \
break; \
} \
} \
\
/* post fragment */ \
rc=ompi_fifo_write_to_head_same_base_addr(frag, fifo, \
mca_bmi_sm_component.sm_mpool); \
if( 0 <= rc ) { \
MCA_BMI_SM_SIGNAL_PEER(bmi_peer); \
rc=OMPI_SUCCESS; \
} \
if(ompi_using_threads()) \
ompi_atomic_unlock(&fifo->head_lock); \
} while(0)
#endif

49
src/mca/bmi/sm/bmi_sm_frag.c Обычный файл
Просмотреть файл

@ -0,0 +1,49 @@
#include "bmi_sm_frag.h"
static inline void mca_bmi_sm_frag_constructor(mca_bmi_sm_frag_t* frag)
{
frag->segment.seg_addr.pval = frag+1;
frag->segment.seg_len = frag->size;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
}
static void mca_bmi_sm_frag1_constructor(mca_bmi_sm_frag_t* frag)
{
frag->size = mca_bmi_sm_component.first_fragment_size;
mca_bmi_sm_frag_constructor(frag);
}
static void mca_bmi_sm_frag2_constructor(mca_bmi_sm_frag_t* frag)
{
frag->size = mca_bmi_sm_component.max_fragment_size;
mca_bmi_sm_frag_constructor(frag);
}
static void mca_bmi_sm_frag_destructor(mca_bmi_sm_frag_t* frag)
{
}
OBJ_CLASS_INSTANCE(
mca_bmi_sm_frag_t,
mca_bmi_base_descriptor_t,
mca_bmi_sm_frag_constructor,
mca_bmi_sm_frag_destructor);
OBJ_CLASS_INSTANCE(
mca_bmi_sm_frag1_t,
mca_bmi_base_descriptor_t,
mca_bmi_sm_frag1_constructor,
mca_bmi_sm_frag_destructor);
OBJ_CLASS_INSTANCE(
mca_bmi_sm_frag2_t,
mca_bmi_base_descriptor_t,
mca_bmi_sm_frag2_constructor,
mca_bmi_sm_frag_destructor);

82
src/mca/bmi/sm/bmi_sm_frag.h Обычный файл
Просмотреть файл

@ -0,0 +1,82 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_BMI_SM_SEND_FRAG_H
#define MCA_BMI_SM_SEND_FRAG_H
#include <sys/types.h>
#include "ompi_config.h"
#include "class/ompi_free_list.h"
#include "bmi_sm.h"
typedef enum {
MCA_BMI_SM_FRAG_SEND,
MCA_BMI_SM_FRAG_PUT,
MCA_BMI_SM_FRAG_GET,
MCA_BMI_SM_FRAG_ACK
} mca_bmi_sm_frag_type_t;
/**
* shared memory send fragment derived type.
*/
struct mca_bmi_sm_frag_t {
mca_bmi_base_descriptor_t base;
mca_bmi_base_segment_t segment;
struct mca_bmi_base_endpoint_t *endpoint;
mca_bmi_sm_frag_type_t type;
mca_bmi_base_tag_t tag;
size_t size;
int rc;
};
typedef struct mca_bmi_sm_frag_t mca_bmi_sm_frag_t;
typedef struct mca_bmi_sm_frag_t mca_bmi_sm_frag1_t;
typedef struct mca_bmi_sm_frag_t mca_bmi_sm_frag2_t;
OBJ_CLASS_DECLARATION(mca_bmi_sm_frag_t);
OBJ_CLASS_DECLARATION(mca_bmi_sm_frag1_t);
OBJ_CLASS_DECLARATION(mca_bmi_sm_frag2_t);
#define MCA_BMI_SM_FRAG_ALLOC1(frag, rc) \
{ \
ompi_list_item_t* item; \
OMPI_FREE_LIST_WAIT(&mca_bmi_sm_component.sm_frags1, item, rc); \
frag = (mca_bmi_sm_frag_t*)item; \
}
#define MCA_BMI_SM_FRAG_ALLOC2(frag, rc) \
{ \
ompi_list_item_t* item; \
OMPI_FREE_LIST_WAIT(&mca_bmi_sm_component.sm_frags2, item, rc); \
frag = (mca_bmi_sm_frag_t*)item; \
}
#define MCA_BMI_SM_FRAG_RETURN1(frag) \
{ \
OMPI_FREE_LIST_RETURN(&mca_bmi_sm_component.sm_frags1, &frag->super); \
}
#define MCA_BMI_SM_FRAG_RETURN2(frag) \
{ \
OMPI_FREE_LIST_RETURN(&mca_bmi_sm_component.sm_frags2, &frag->super); \
}
#endif

22
src/mca/bmi/sm/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,22 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_INIT_FILE=bmi_sm.c
PARAM_CONFIG_HEADER_FILE="sm_config.h"
PARAM_CONFIG_FILES="Makefile"

Просмотреть файл

@ -146,10 +146,23 @@ static int orte_pls_fork_proc(
char* uri;
char **new_env, **environ_copy;
#if 0
/* for gperf - setup a new directory for each executable */
char path[PATH_MAX];
/* set working directory */
sprintf(path, "%s/%d", context->cwd, getpid());
if(mkdir(path,0777) != 0) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
}
if(chdir(path) != 0) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
}
#else
if(chdir(context->cwd) != 0) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
}
#endif
/* setup base environment */
environ_copy = ompi_argv_copy(environ);

Просмотреть файл

@ -35,6 +35,7 @@ static size_t mca_pml_bsend_size; /* size of users buffer */
static size_t mca_pml_bsend_count; /* number of outstanding requests */
static size_t mca_pml_bsend_pagesz; /* mmap page size */
static int mca_pml_bsend_pagebits; /* number of bits in pagesz */
static int32_t mca_pml_bsend_init = 0;
@ -62,10 +63,14 @@ static void* mca_pml_bsend_alloc_segment(size_t* size_inout)
*/
int mca_pml_base_bsend_init(bool thread_safe)
{
static int initialized;
int id = mca_base_param_register_string("pml", "base", "bsend_allocator", NULL, "basic");
char *name;
size_t tmp;
if(OMPI_THREAD_ADD32(&mca_pml_bsend_init, 1) > 0)
return OMPI_SUCCESS;
/* initialize static objects */
OBJ_CONSTRUCT(&mca_pml_bsend_mutex, ompi_mutex_t);
OBJ_CONSTRUCT(&mca_pml_bsend_condition, ompi_condition_t);
@ -94,6 +99,9 @@ int mca_pml_base_bsend_init(bool thread_safe)
*/
int mca_pml_base_bsend_fini()
{
if(OMPI_THREAD_ADD32(&mca_pml_bsend_init,-1) > 0)
return OMPI_SUCCESS;
if(NULL != mca_pml_bsend_allocator)
mca_pml_bsend_allocator->alc_finalize(mca_pml_bsend_allocator);
mca_pml_bsend_allocator = NULL;

Просмотреть файл

@ -45,7 +45,6 @@ int mca_pml_base_output = -1;
OMPI_DECLSPEC mca_pml_base_module_t mca_pml = {
NULL, /* pml_add_procs */
NULL, /* pml_del_procs */
NULL, /* pml_add_ptls */
NULL, /* pml_control */
mca_pml_base_progress, /* pml_progress */
NULL, /* pml_add_comm */

Просмотреть файл

@ -23,7 +23,7 @@
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
OMPI_DECLSPEC extern ompi_class_t mca_pml_base_recv_request_t_class;
/**
* Base type for receive requests.
*/
@ -33,6 +33,7 @@ struct mca_pml_base_recv_request_t {
};
typedef struct mca_pml_base_recv_request_t mca_pml_base_recv_request_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_base_recv_request_t);
/**
* Initialize a receive request with call parameters.
@ -81,10 +82,11 @@ typedef struct mca_pml_base_recv_request_t mca_pml_base_recv_request_t;
*
* @param request (IN) Receive request.
*/
#define MCA_PML_BASE_RECV_REQUEST_RETURN( request ) \
do { \
OBJ_RELEASE( (request)->req_base.req_comm); \
OBJ_RELEASE( (request)->req_base.req_datatype ); \
#define MCA_PML_BASE_RECV_REQUEST_FINI( request ) \
do { \
OMPI_REQUEST_FINI(&(request)->req_base.req_ompi); \
OBJ_RELEASE( (request)->req_base.req_comm); \
OBJ_RELEASE( (request)->req_base.req_datatype ); \
} while (0)
#if defined(c_plusplus) || defined(__cplusplus)

Просмотреть файл

@ -22,7 +22,6 @@
#include "class/ompi_free_list.h"
#include "request/request.h"
#include "communicator/communicator.h"
#include "mca/ptl/ptl.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
@ -52,7 +51,7 @@ struct mca_pml_base_request_t {
int32_t req_tag; /**< user defined tag */
ompi_communicator_t *req_comm; /**< communicator pointer */
ompi_proc_t* req_proc; /**< peer process */
mca_ptl_sequence_t req_sequence; /**< sequence number for MPI pt-2-pt ordering */
uint64_t req_sequence; /**< sequence number for MPI pt-2-pt ordering */
struct ompi_datatype_t *req_datatype; /**< pointer to data type */
mca_pml_base_request_type_t req_type; /**< MPI request type - used for test */
bool req_persistent; /**< flag indicating if the this is a persistent request */

Просмотреть файл

@ -73,46 +73,46 @@ typedef struct mca_pml_base_send_request_t mca_pml_base_send_request_t;
mode, \
persistent) \
{ \
/* increment reference count on communicator */ \
/* increment reference counts */ \
OBJ_RETAIN(comm); \
OBJ_RETAIN(datatype); \
\
OMPI_REQUEST_INIT(&(request)->req_base.req_ompi); \
request->req_addr = addr; \
request->req_count = count; \
request->req_datatype = datatype; \
request->req_send_mode = mode; \
request->req_base.req_addr = addr; \
request->req_base.req_count = count; \
request->req_base.req_datatype = datatype; \
request->req_base.req_peer = (int32_t)peer; \
request->req_base.req_tag = (int32_t)tag; \
request->req_base.req_comm = comm; \
request->req_base.req_proc = ompi_comm_peer_lookup(comm,peer); \
request->req_base.req_persistent = persistent; \
request->req_base.req_pml_complete = (persistent ? true : false); \
request->req_base.req_free_called = false; \
request->req_base.req_ompi.req_status._cancelled = 0; \
(request)->req_addr = addr; \
(request)->req_count = count; \
(request)->req_datatype = datatype; \
(request)->req_send_mode = mode; \
(request)->req_base.req_addr = addr; \
(request)->req_base.req_count = count; \
(request)->req_base.req_datatype = datatype; \
(request)->req_base.req_peer = (int32_t)peer; \
(request)->req_base.req_tag = (int32_t)tag; \
(request)->req_base.req_comm = comm; \
(request)->req_base.req_proc = ompi_comm_peer_lookup(comm,peer); \
(request)->req_base.req_persistent = persistent; \
(request)->req_base.req_pml_complete = (persistent ? true : false); \
(request)->req_base.req_free_called = false; \
(request)->req_base.req_ompi.req_status._cancelled = 0; \
\
/* initialize datatype convertor for this request */ \
if(count > 0) { \
ompi_convertor_copy( request->req_base.req_proc->proc_convertor, \
&request->req_convertor); \
ompi_convertor_copy((request)->req_base.req_proc->proc_convertor,\
&(request)->req_convertor); \
/* We will create a convertor specialized for send */ \
/* just to be able to get the packed size. This size */ \
/* depend on the conversion used sender side or receiver */ \
/* size. BEWARE this convertor is not suitable for the */ \
/* sending operation !! */ \
ompi_convertor_init_for_send( &request->req_convertor, \
ompi_convertor_init_for_send( &(request)->req_convertor, \
0, \
request->req_base.req_datatype, \
request->req_base.req_count, \
request->req_base.req_addr, \
(request)->req_base.req_datatype, \
(request)->req_base.req_count, \
(request)->req_base.req_addr, \
0, NULL ); \
ompi_convertor_get_packed_size( &request->req_convertor, \
(uint32_t*)&(request->req_bytes_packed) ); \
ompi_convertor_get_packed_size( &(request)->req_convertor, \
(uint32_t*)&((request)->req_bytes_packed) );\
} else { \
request->req_bytes_packed = 0; \
(request)->req_bytes_packed = 0; \
} \
}
@ -122,8 +122,9 @@ typedef struct mca_pml_base_send_request_t mca_pml_base_send_request_t;
* @param request (IN) The send request.
*/
#define MCA_PML_BASE_SEND_REQUEST_RETURN( request ) \
#define MCA_PML_BASE_SEND_REQUEST_FINI( request ) \
do { \
OMPI_REQUEST_FINI(&(request)->req_base.req_ompi); \
OBJ_RELEASE((request)->req_base.req_comm); \
OBJ_RELEASE((request)->req_base.req_datatype); \
} while (0)

0
src/mca/pml/ob1/.ompi_ignore Обычный файл
Просмотреть файл

2
src/mca/pml/ob1/.ompi_unignore Обычный файл
Просмотреть файл

@ -0,0 +1,2 @@
twoodall
gshipman

41
src/mca/pml/ob1/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,41 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_ompi_srcdir)/config/Makefile.options
noinst_LTLIBRARIES = libmca_pml_ob1.la
libmca_pml_ob1_la_SOURCES = \
pml_ob1.c \
pml_ob1.h \
pml_ob1_comm.c \
pml_ob1_component.c \
pml_ob1_component.h \
pml_ob1_endpoint.c \
pml_ob1_endpoint.h \
pml_ob1_iprobe.c \
pml_ob1_irecv.c \
pml_ob1_isend.c \
pml_ob1_match.c \
pml_ob1_proc.c \
pml_ob1_proc.h \
pml_ob1_progress.c \
pml_ob1_recvfrag.c \
pml_ob1_recvfrag.h \
pml_ob1_recvreq.c \
pml_ob1_recvreq.h \
pml_ob1_sendreq.c \
pml_ob1_sendreq.h \
pml_ob1_start.c

22
src/mca/pml/ob1/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,22 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_INIT_FILE=pml_ob1.c
PARAM_CONFIG_HEADER_FILE="ob1_config.h"
PARAM_CONFIG_FILES="Makefile"

405
src/mca/pml/ob1/pml_ob1.c Обычный файл
Просмотреть файл

@ -0,0 +1,405 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdlib.h>
#include <string.h>
#include "class/ompi_bitmap.h"
#include "mca/pml/pml.h"
#include "mca/bmi/bmi.h"
#include "mca/bmi/base/base.h"
#include "pml_ob1.h"
#include "pml_ob1_component.h"
#include "pml_ob1_comm.h"
#include "pml_ob1_proc.h"
#include "pml_ob1_hdr.h"
mca_pml_ob1_t mca_pml_ob1 = {
{
mca_pml_ob1_add_procs,
mca_pml_ob1_del_procs,
mca_pml_ob1_enable,
mca_pml_ob1_progress,
mca_pml_ob1_add_comm,
mca_pml_ob1_del_comm,
mca_pml_ob1_irecv_init,
mca_pml_ob1_irecv,
mca_pml_ob1_recv,
mca_pml_ob1_isend_init,
mca_pml_ob1_isend,
mca_pml_ob1_send,
mca_pml_ob1_iprobe,
mca_pml_ob1_probe,
mca_pml_ob1_start
}
};
int mca_pml_ob1_enable(bool enable)
{
return OMPI_SUCCESS;
}
int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
{
/* allocate pml specific comm data */
mca_pml_ob1_comm_t* pml_comm = OBJ_NEW(mca_pml_ob1_comm_t);
int i;
if (NULL == pml_comm) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_pml_ob1_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count);
comm->c_pml_comm = pml_comm;
comm->c_pml_procs = (mca_pml_ob1_proc_t**)malloc(
comm->c_remote_group->grp_proc_count * sizeof(mca_pml_ob1_proc_t));
if(NULL == comm->c_pml_procs) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(i=0; i<comm->c_remote_group->grp_proc_count; i++)
comm->c_pml_procs[i] = comm->c_remote_group->grp_proc_pointers[i]->proc_pml;
return OMPI_SUCCESS;
}
int mca_pml_ob1_del_comm(ompi_communicator_t* comm)
{
OBJ_RELEASE(comm->c_pml_comm);
comm->c_pml_comm = NULL;
if(comm->c_pml_procs != NULL)
free(comm->c_pml_procs);
comm->c_pml_procs = NULL;
return OMPI_SUCCESS;
}
static int bmi_exclusivity_compare(const void* arg1, const void* arg2)
{
mca_bmi_base_module_t* bmi1 = *(struct mca_bmi_base_module_t**)arg1;
mca_bmi_base_module_t* bmi2 = *(struct mca_bmi_base_module_t**)arg2;
if( bmi1->bmi_exclusivity > bmi2->bmi_exclusivity ) {
return -1;
} else if (bmi1->bmi_exclusivity == bmi2->bmi_exclusivity ) {
return 0;
} else {
return 1;
}
}
int mca_pml_ob1_add_bmis()
{
/* build an array of ob1s and ob1 modules */
ompi_list_t* bmis = &mca_bmi_base_modules_initialized;
mca_bmi_base_selected_module_t* selected_bmi;
size_t num_bmis = ompi_list_get_size(bmis);
mca_pml_ob1.num_bmi_modules = 0;
mca_pml_ob1.num_bmi_progress = 0;
mca_pml_ob1.num_bmi_components = 0;
mca_pml_ob1.bmi_modules = (mca_bmi_base_module_t **)malloc(sizeof(mca_bmi_base_module_t*) * num_bmis);
mca_pml_ob1.bmi_progress = (mca_bmi_base_component_progress_fn_t*)malloc(sizeof(mca_bmi_base_component_progress_fn_t) * num_bmis);
mca_pml_ob1.bmi_components = (mca_bmi_base_component_t **)malloc(sizeof(mca_bmi_base_component_t*) * num_bmis);
if (NULL == mca_pml_ob1.bmi_modules ||
NULL == mca_pml_ob1.bmi_progress ||
NULL == mca_pml_ob1.bmi_components) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(selected_bmi = (mca_bmi_base_selected_module_t*)ompi_list_get_first(bmis);
selected_bmi != (mca_bmi_base_selected_module_t*)ompi_list_get_end(bmis);
selected_bmi = (mca_bmi_base_selected_module_t*)ompi_list_get_next(selected_bmi)) {
mca_bmi_base_module_t *bmi = selected_bmi->bmi_module;
size_t i;
int rc;
mca_pml_ob1.bmi_modules[mca_pml_ob1.num_bmi_modules++] = bmi;
for(i=0; i<mca_pml_ob1.num_bmi_components; i++) {
if(mca_pml_ob1.bmi_components[i] == bmi->bmi_component) {
break;
}
}
/* setup callback for receive */
rc = bmi->bmi_register(bmi, MCA_BMI_TAG_PML, mca_pml_ob1_recv_callback, NULL);
if(OMPI_SUCCESS != rc)
return rc;
if(i == mca_pml_ob1.num_bmi_components) {
mca_pml_ob1.bmi_components[mca_pml_ob1.num_bmi_components++] = bmi->bmi_component;
}
}
/* sort ob1 list by exclusivity */
qsort(mca_pml_ob1.bmi_modules,
mca_pml_ob1.num_bmi_modules,
sizeof(struct mca_bmi_base_module_t*),
bmi_exclusivity_compare);
return OMPI_SUCCESS;
}
/*
* For each proc setup a datastructure that indicates the PTLs
* that can be used to reach the destination.
*
*/
int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
{
size_t p;
ompi_bitmap_t reachable;
struct mca_bmi_base_endpoint_t** bmi_endpoints = NULL;
int rc;
size_t p_index;
if(nprocs == 0)
return OMPI_SUCCESS;
OBJ_CONSTRUCT(&reachable, ompi_bitmap_t);
rc = ompi_bitmap_init(&reachable, nprocs);
if(OMPI_SUCCESS != rc)
return rc;
/* iterate through each of the procs and set the peers architecture */
for(p=0; p<nprocs; p++) {
uint32_t* proc_arch;
size_t size = sizeof(uint32_t);
rc = mca_base_modex_recv(&mca_pml_ob1_component.pmlm_version, procs[p],
(void**)&proc_arch, &size);
if(rc != OMPI_SUCCESS)
return rc;
if(size != sizeof(uint32_t))
return OMPI_ERROR;
procs[p]->proc_arch = ntohl(*proc_arch);
free(proc_arch);
}
/* attempt to add all procs to each ob1 */
bmi_endpoints = (struct mca_bmi_base_endpoint_t **)malloc(nprocs * sizeof(struct mca_bmi_base_endpoint_t*));
for(p_index = 0; p_index < mca_pml_ob1.num_bmi_modules; p_index++) {
mca_bmi_base_module_t* bmi = mca_pml_ob1.bmi_modules[p_index];
int bmi_inuse = 0;
/* if the ob1 can reach the destination proc it sets the
* corresponding bit (proc index) in the reachable bitmap
* and can return addressing information for each proc
* that is passed back to the ob1 on data transfer calls
*/
ompi_bitmap_clear_all_bits(&reachable);
memset(bmi_endpoints, 0, nprocs * sizeof(struct mca_ob1_base_endpoint_t*));
rc = bmi->bmi_add_procs(bmi, nprocs, procs, bmi_endpoints, &reachable);
if(OMPI_SUCCESS != rc) {
free(bmi_endpoints);
return rc;
}
/* for each proc that is reachable - add the ob1 to the procs array(s) */
for(p=0; p<nprocs; p++) {
if(ompi_bitmap_is_set_bit(&reachable, p)) {
ompi_proc_t *proc = procs[p];
mca_pml_ob1_proc_t* proc_pml = proc->proc_pml;
mca_pml_ob1_endpoint_t* endpoint;
size_t size;
/* this ob1 can be used */
bmi_inuse++;
/* initialize each proc */
if(NULL == proc_pml) {
/* allocate pml specific proc data */
proc_pml = OBJ_NEW(mca_pml_ob1_proc_t);
if (NULL == proc_pml) {
ompi_output(0, "mca_pml_ob1_add_procs: unable to allocate resources");
free(bmi_endpoints);
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* preallocate space in array for max number of ob1s */
mca_pml_ob1_ep_array_reserve(&proc_pml->bmi_first, mca_pml_ob1.num_bmi_modules);
mca_pml_ob1_ep_array_reserve(&proc_pml->bmi_next, mca_pml_ob1.num_bmi_modules);
proc_pml->proc_ompi = proc;
proc->proc_pml = proc_pml;
}
/* dont allow an additional PTL with a lower exclusivity ranking */
size = mca_pml_ob1_ep_array_get_size(&proc_pml->bmi_next);
if(size > 0) {
endpoint = mca_pml_ob1_ep_array_get_index(&proc_pml->bmi_next, size-1);
/* skip this ob1 if the exclusivity is less than the previous */
if(endpoint->bmi->bmi_exclusivity > bmi->bmi_exclusivity) {
if(bmi_endpoints[p] != NULL) {
bmi->bmi_del_procs(bmi, 1, &proc, &bmi_endpoints[p]);
}
continue;
}
}
/* cache the ob1 on the proc */
endpoint = mca_pml_ob1_ep_array_insert(&proc_pml->bmi_next);
endpoint->bmi = bmi;
endpoint->bmi_cache = NULL;
endpoint->bmi_endpoint = bmi_endpoints[p];
endpoint->bmi_weight = 0;
endpoint->bmi_alloc = bmi->bmi_alloc;
endpoint->bmi_free = bmi->bmi_free;
endpoint->bmi_send = bmi->bmi_send;
endpoint->bmi_put = bmi->bmi_put;
endpoint->bmi_get = bmi->bmi_get;
}
}
if(bmi_inuse > 0 && NULL != bmi->bmi_component->bmi_progress) {
size_t p;
bool found = false;
for(p=0; p<mca_pml_ob1.num_bmi_progress; p++) {
if(mca_pml_ob1.bmi_progress[p] == bmi->bmi_component->bmi_progress) {
found = true;
break;
}
}
if(found == false) {
mca_pml_ob1.bmi_progress[mca_pml_ob1.num_bmi_progress] =
bmi->bmi_component->bmi_progress;
mca_pml_ob1.num_bmi_progress++;
}
}
}
free(bmi_endpoints);
/* iterate back through procs and compute metrics for registered ob1s */
for(p=0; p<nprocs; p++) {
ompi_proc_t *proc = procs[p];
mca_pml_ob1_proc_t* proc_pml = proc->proc_pml;
double total_bandwidth = 0;
uint32_t latency = 0;
size_t n_index;
size_t n_size;
/* skip over procs w/ no ob1s registered */
if(NULL == proc_pml)
continue;
/* (1) determine the total bandwidth available across all ob1s
* note that we need to do this here, as we may already have ob1s configured
* (2) determine the highest priority ranking for latency
*/
n_size = mca_pml_ob1_ep_array_get_size(&proc_pml->bmi_next);
for(n_index = 0; n_index < n_size; n_index++) {
mca_pml_ob1_endpoint_t* endpoint =
mca_pml_ob1_ep_array_get_index(&proc_pml->bmi_next, n_index);
mca_bmi_base_module_t* ob1 = endpoint->bmi;
total_bandwidth += endpoint->bmi->bmi_bandwidth;
if(ob1->bmi_latency > latency)
latency = ob1->bmi_latency;
}
/* (1) set the weight of each ob1 as a percentage of overall bandwidth
* (2) copy all ob1 instances at the highest priority ranking into the
* list of ob1s used for first fragments
*/
for(n_index = 0; n_index < n_size; n_index++) {
mca_pml_ob1_endpoint_t* endpoint =
mca_pml_ob1_ep_array_get_index(&proc_pml->bmi_next, n_index);
mca_bmi_base_module_t *ob1 = endpoint->bmi;
double weight;
/* compute weighting factor for this ob1 */
if(ob1->bmi_bandwidth)
weight = endpoint->bmi->bmi_bandwidth / total_bandwidth;
else
weight = 1.0 / n_size;
endpoint->bmi_weight = (int)(weight * 100);
/* check to see if this ob1 is already in the array of ob1s
* used for first fragments - if not add it.
*/
if(ob1->bmi_latency == latency) {
mca_pml_ob1_endpoint_t* ep_new =
mca_pml_ob1_ep_array_insert(&proc_pml->bmi_first);
*ep_new = *endpoint;
}
}
}
return OMPI_SUCCESS;
}
/*
* iterate through each proc and notify any PTLs associated
* with the proc that it is/has gone away
*/
int mca_pml_ob1_del_procs(ompi_proc_t** procs, size_t nprocs)
{
size_t p;
int rc;
for(p = 0; p < nprocs; p++) {
ompi_proc_t *proc = procs[p];
mca_pml_ob1_proc_t* proc_pml = proc->proc_pml;
size_t f_index, f_size;
size_t n_index, n_size;
/* notify each ob1 that the proc is going away */
f_size = mca_pml_ob1_ep_array_get_size(&proc_pml->bmi_first);
for(f_index = 0; f_index < f_size; f_index++) {
mca_pml_ob1_endpoint_t* endpoint = mca_pml_ob1_ep_array_get_index(&proc_pml->bmi_first, f_index);
mca_bmi_base_module_t* ob1 = endpoint->bmi;
rc = ob1->bmi_del_procs(ob1,1,&proc,&endpoint->bmi_endpoint);
if(OMPI_SUCCESS != rc) {
return rc;
}
/* remove this from next array so that we dont call it twice w/
* the same address pointer
*/
n_size = mca_pml_ob1_ep_array_get_size(&proc_pml->bmi_first);
for(n_index = 0; n_index < n_size; n_index++) {
mca_pml_ob1_endpoint_t* endpoint = mca_pml_ob1_ep_array_get_index(&proc_pml->bmi_next, n_index);
if(endpoint->bmi == ob1) {
memset(endpoint, 0, sizeof(mca_pml_ob1_endpoint_t));
break;
}
}
}
/* notify each ob1 that was not in the array of ob1s for first fragments */
n_size = mca_pml_ob1_ep_array_get_size(&proc_pml->bmi_next);
for(n_index = 0; n_index < n_size; n_index++) {
mca_pml_ob1_endpoint_t* endpoint = mca_pml_ob1_ep_array_get_index(&proc_pml->bmi_first, n_index);
mca_bmi_base_module_t* ob1 = endpoint->bmi;
if (ob1 != 0) {
rc = ob1->bmi_del_procs(ob1,1,&proc,&endpoint->bmi_endpoint);
if(OMPI_SUCCESS != rc)
return rc;
}
}
/* do any required cleanup */
OBJ_RELEASE(proc_pml);
proc->proc_pml = NULL;
}
return OMPI_SUCCESS;
}
int mca_pml_ob1_component_fini(void)
{
/* FIX */
return OMPI_SUCCESS;
}

266
src/mca/pml/ob1/pml_ob1.h Обычный файл
Просмотреть файл

@ -0,0 +1,266 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_GEN2_H
#define MCA_PML_GEN2_H
#include "threads/thread.h"
#include "threads/condition.h"
#include "class/ompi_free_list.h"
#include "util/cmd_line.h"
#include "request/request.h"
#include "mca/pml/pml.h"
#include "mca/pml/base/pml_base_request.h"
#include "mca/pml/base/pml_base_bsend.h"
#include "mca/pml/base/pml_base_sendreq.h"
#include "mca/bmi/bmi.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* GEN2 PML module
*/
struct mca_pml_ob1_t {
mca_pml_base_module_t super;
mca_bmi_base_component_t **bmi_components;
size_t num_bmi_components;
mca_bmi_base_module_t** bmi_modules;
size_t num_bmi_modules;
mca_bmi_base_component_progress_fn_t* bmi_progress;
size_t num_bmi_progress;
int priority;
int free_list_num; /* initial size of free list */
int free_list_max; /* maximum size of free list */
int free_list_inc; /* number of elements to grow free list */
int poll_iterations; /* number of iterations to poll for completion */
/* lock queue access */
ompi_mutex_t lock;
/* free list of requests */
ompi_free_list_t send_requests;
ompi_free_list_t recv_requests;
/* list of pending send requests */
ompi_list_t send_pending;
};
typedef struct mca_pml_ob1_t mca_pml_ob1_t;
extern mca_pml_ob1_t mca_pml_ob1;
/*
* PML module functions.
*/
extern int mca_pml_ob1_component_open(void);
extern int mca_pml_ob1_component_close(void);
extern mca_pml_base_module_t* mca_pml_ob1_component_init(
int *priority,
bool enable_progress_threads,
bool enable_mpi_threads
);
extern int mca_pml_ob1_component_fini(void);
/*
* PML interface functions.
*/
extern int mca_pml_ob1_add_comm(
struct ompi_communicator_t* comm
);
extern int mca_pml_ob1_del_comm(
struct ompi_communicator_t* comm
);
extern int mca_pml_ob1_add_procs(
struct ompi_proc_t **procs,
size_t nprocs
);
extern int mca_pml_ob1_del_procs(
struct ompi_proc_t **procs,
size_t nprocs
);
extern int mca_pml_ob1_add_bmis(void);
extern int mca_pml_ob1_enable(
bool enable
);
extern int mca_pml_ob1_progress(void);
extern int mca_pml_ob1_iprobe(
int dst,
int tag,
struct ompi_communicator_t* comm,
int *matched,
ompi_status_public_t* status
);
extern int mca_pml_ob1_probe(
int dst,
int tag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status
);
extern int mca_pml_ob1_isend_init(
void *buf,
size_t count,
ompi_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request
);
extern int mca_pml_ob1_isend(
void *buf,
size_t count,
ompi_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request
);
extern int mca_pml_ob1_send(
void *buf,
size_t count,
ompi_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm
);
extern int mca_pml_ob1_irecv_init(
void *buf,
size_t count,
ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
struct ompi_request_t **request
);
extern int mca_pml_ob1_irecv(
void *buf,
size_t count,
ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
struct ompi_request_t **request
);
extern int mca_pml_ob1_recv(
void *buf,
size_t count,
ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status
);
extern void mca_pml_ob1_recv_callback(
mca_bmi_base_module_t* bmi,
mca_bmi_base_tag_t tag,
mca_bmi_base_descriptor_t* descriptor,
void* cbdata
);
extern int mca_pml_ob1_progress(void);
extern int mca_pml_ob1_start(
size_t count,
ompi_request_t** requests
);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#define MCA_PML_GEN2_FINI(request) \
{ \
mca_pml_base_request_t* pml_request = *(mca_pml_base_request_t**)(request); \
if(pml_request->req_persistent) { \
if(pml_request->req_free_called) { \
MCA_PML_GEN2_FREE(request); \
} else { \
pml_request->req_ompi.req_state = OMPI_REQUEST_INACTIVE; \
} \
} else { \
MCA_PML_GEN2_FREE(request); \
} \
}
#define MCA_PML_GEN2_FREE(request) \
{ \
mca_pml_base_request_t* pml_request = *(mca_pml_base_request_t**)(request); \
pml_request->req_free_called = true; \
if( pml_request->req_pml_complete == true) \
{ \
switch(pml_request->req_type) { \
case MCA_PML_REQUEST_SEND: \
{ \
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)pml_request; \
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \
mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq); \
} \
MCA_PML_GEN2_SEND_REQUEST_RETURN(sendreq); \
break; \
} \
case MCA_PML_REQUEST_RECV: \
{ \
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)pml_request; \
MCA_PML_GEN2_RECV_REQUEST_RETURN(recvreq); \
break; \
} \
default: \
break; \
} \
} \
*(request) = MPI_REQUEST_NULL; \
}
#endif

94
src/mca/pml/ob1/pml_ob1_comm.c Обычный файл
Просмотреть файл

@ -0,0 +1,94 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "pml_ob1.h"
#include "pml_ob1_comm.h"
static void mca_pml_ob1_comm_proc_construct(mca_pml_ob1_comm_proc_t* proc)
{
proc->expected_sequence = 1;
OBJ_CONSTRUCT(&proc->frags_cant_match, ompi_list_t);
OBJ_CONSTRUCT(&proc->specific_receives, ompi_list_t);
OBJ_CONSTRUCT(&proc->unexpected_frags, ompi_list_t);
}
static void mca_pml_ob1_comm_proc_destruct(mca_pml_ob1_comm_proc_t* proc)
{
OBJ_DESTRUCT(&proc->frags_cant_match);
OBJ_DESTRUCT(&proc->specific_receives);
OBJ_DESTRUCT(&proc->unexpected_frags);
}
static OBJ_CLASS_INSTANCE(
mca_pml_ob1_comm_proc_t,
ompi_object_t,
mca_pml_ob1_comm_proc_construct,
mca_pml_ob1_comm_proc_destruct);
static void mca_pml_ob1_comm_construct(mca_pml_ob1_comm_t* comm)
{
OBJ_CONSTRUCT(&comm->wild_receives, ompi_list_t);
OBJ_CONSTRUCT(&comm->matching_lock, ompi_mutex_t);
comm->recv_sequence = 0;
comm->procs = NULL;
comm->num_procs = 0;
}
static void mca_pml_ob1_comm_destruct(mca_pml_ob1_comm_t* comm)
{
size_t i;
for(i=0; i<comm->num_procs; i++)
OBJ_DESTRUCT((&comm->procs[i]));
if(NULL != comm->procs)
free(comm->procs);
OBJ_DESTRUCT(&comm->wild_receives);
OBJ_DESTRUCT(&comm->matching_lock);
}
OBJ_CLASS_INSTANCE(
mca_pml_ob1_comm_t,
ompi_object_t,
mca_pml_ob1_comm_construct,
mca_pml_ob1_comm_destruct);
int mca_pml_ob1_comm_init_size(mca_pml_ob1_comm_t* comm, size_t size)
{
size_t i;
/* send message sequence-number support - sender side */
comm->procs = malloc(sizeof(mca_pml_ob1_comm_proc_t)*size);
if(NULL == comm->procs) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(i=0; i<size; i++) {
OBJ_CONSTRUCT(comm->procs+i, mca_pml_ob1_comm_proc_t);
}
comm->num_procs = size;
return OMPI_SUCCESS;
}

72
src/mca/pml/ob1/pml_ob1_comm.h Обычный файл
Просмотреть файл

@ -0,0 +1,72 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_GEN2_COMM_H
#define MCA_PML_GEN2_COMM_H
#include "threads/mutex.h"
#include "threads/condition.h"
#include "mca/ptl/ptl.h"
#include "class/ompi_list.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct mca_pml_ob1_comm_proc_t {
ompi_object_t super;
uint16_t expected_sequence; /**< send message sequence number - receiver side */
ompi_list_t frags_cant_match; /**< out-of-order fragment queues */
ompi_list_t specific_receives; /**< queues of unmatched specific receives */
ompi_list_t unexpected_frags; /**< unexpected fragment queues */
};
typedef struct mca_pml_ob1_comm_proc_t mca_pml_ob1_comm_proc_t;
/**
* Cached on ompi_communicator_t to hold queues/state
* used by the PML<->PTL interface for matching logic.
*/
struct mca_pml_comm_t {
ompi_object_t super;
mca_ptl_sequence_t recv_sequence; /**< recv request sequence number - receiver side */
ompi_mutex_t matching_lock; /**< matching lock */
ompi_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */
mca_pml_ob1_comm_proc_t* procs;
size_t num_procs;
};
typedef struct mca_pml_comm_t mca_pml_ob1_comm_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_t);
/**
* Initialize an instance of mca_pml_ob1_comm_t based on the communicator size.
*
* @param comm Instance of mca_pml_ob1_comm_t
* @param size Size of communicator
* @return OMPI_SUCCESS or error status on failure.
*/
OMPI_DECLSPEC extern int mca_pml_ob1_comm_init_size(mca_pml_ob1_comm_t* comm, size_t size);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

191
src/mca/pml/ob1/pml_ob1_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,191 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "include/sys/cache.h"
#include "event/event.h"
#include "mpi.h"
#include "mca/pml/pml.h"
#include "mca/bmi/bmi.h"
#include "mca/bmi/base/base.h"
#include "mca/base/mca_base_param.h"
#include "mca/pml/base/pml_base_bsend.h"
#include "pml_ob1.h"
#include "pml_ob1_proc.h"
#include "pml_ob1_hdr.h"
#include "pml_ob1_sendreq.h"
#include "pml_ob1_recvreq.h"
mca_pml_base_component_1_0_0_t mca_pml_ob1_component = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a pml v1.0.0 component (which also implies
a specific MCA version) */
MCA_PML_BASE_VERSION_1_0_0,
"ob1", /* MCA component name */
1, /* MCA component major version */
0, /* MCA component minor version */
0, /* MCA component release version */
mca_pml_ob1_component_open, /* component open */
mca_pml_ob1_component_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
mca_pml_ob1_component_init, /* component init */
mca_pml_ob1_component_fini /* component finalize */
};
static inline int mca_pml_ob1_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("pml","ob1",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
int mca_pml_ob1_component_open(void)
{
OBJ_CONSTRUCT(&mca_pml_ob1.lock, ompi_mutex_t);
OBJ_CONSTRUCT(&mca_pml_ob1.send_requests, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_pml_ob1.recv_requests, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_pml_ob1.send_pending, ompi_list_t);
mca_pml_ob1.bmi_components = NULL;
mca_pml_ob1.num_bmi_components = 0;
mca_pml_ob1.bmi_modules = NULL;
mca_pml_ob1.num_bmi_modules = 0;
mca_pml_ob1.bmi_progress = NULL;
mca_pml_ob1.num_bmi_progress = 0;
mca_pml_ob1.free_list_num =
mca_pml_ob1_param_register_int("free_list_num", 256);
mca_pml_ob1.free_list_max =
mca_pml_ob1_param_register_int("free_list_max", -1);
mca_pml_ob1.free_list_inc =
mca_pml_ob1_param_register_int("free_list_inc", 256);
mca_pml_ob1.poll_iterations =
mca_pml_ob1_param_register_int("poll_iterations", 100000);
mca_pml_ob1.priority =
mca_pml_ob1_param_register_int("priority", 0);
return mca_bmi_base_open();
}
int mca_pml_ob1_component_close(void)
{
int rc;
if(OMPI_SUCCESS != (rc = mca_bmi_base_close()))
return rc;
#if OMPI_ENABLE_DEBUG
if (mca_pml_ob1.send_requests.fl_num_allocated !=
mca_pml_ob1.send_requests.super.ompi_list_length) {
ompi_output(0, "ob1 send requests: %d allocated %d returned\n",
mca_pml_ob1.send_requests.fl_num_allocated,
mca_pml_ob1.send_requests.super.ompi_list_length);
}
if (mca_pml_ob1.recv_requests.fl_num_allocated !=
mca_pml_ob1.recv_requests.super.ompi_list_length) {
ompi_output(0, "ob1 recv requests: %d allocated %d returned\n",
mca_pml_ob1.recv_requests.fl_num_allocated,
mca_pml_ob1.recv_requests.super.ompi_list_length);
}
#endif
if(NULL != mca_pml_ob1.bmi_components) {
free(mca_pml_ob1.bmi_components);
}
if(NULL != mca_pml_ob1.bmi_modules) {
free(mca_pml_ob1.bmi_modules);
}
if(NULL != mca_pml_ob1.bmi_progress) {
free(mca_pml_ob1.bmi_progress);
}
OBJ_DESTRUCT(&mca_pml_ob1.send_pending);
OBJ_DESTRUCT(&mca_pml_ob1.send_requests);
OBJ_DESTRUCT(&mca_pml_ob1.recv_requests);
OBJ_DESTRUCT(&mca_pml_ob1.lock);
return OMPI_SUCCESS;
}
mca_pml_base_module_t* mca_pml_ob1_component_init(int* priority,
bool enable_progress_threads,
bool enable_mpi_threads)
{
uint32_t proc_arch;
int rc;
*priority = mca_pml_ob1.priority;
/* requests */
ompi_free_list_init(
&mca_pml_ob1.send_requests,
sizeof(mca_pml_ob1_send_request_t),
OBJ_CLASS(mca_pml_ob1_send_request_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL);
ompi_free_list_init(
&mca_pml_ob1.recv_requests,
sizeof(mca_pml_ob1_recv_request_t),
OBJ_CLASS(mca_pml_ob1_recv_request_t),
mca_pml_ob1.free_list_num,
mca_pml_ob1.free_list_max,
mca_pml_ob1.free_list_inc,
NULL);
/* buffered send */
if(OMPI_SUCCESS != mca_pml_base_bsend_init(enable_mpi_threads)) {
ompi_output(0, "mca_pml_ob1_component_init: mca_pml_bsend_init failed\n");
return NULL;
}
/* post this processes datatype */
proc_arch = ompi_proc_local()->proc_arch;
proc_arch = htonl(proc_arch);
rc = mca_base_modex_send(&mca_pml_ob1_component.pmlm_version, &proc_arch, sizeof(proc_arch));
if(rc != OMPI_SUCCESS)
return NULL;
/* initialize NTLs */
if(OMPI_SUCCESS != mca_bmi_base_select(enable_progress_threads,enable_mpi_threads))
return NULL;
if(OMPI_SUCCESS != mca_pml_ob1_add_bmis())
return NULL;
return &mca_pml_ob1.super;
}

27
src/mca/pml/ob1/pml_ob1_component.h Обычный файл
Просмотреть файл

@ -0,0 +1,27 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_GEN2_COMPONENT_H
#define MCA_PML_GEN2_COMPONENT_H
/*
* PML module functions.
*/
OMPI_COMP_EXPORT extern mca_pml_base_component_1_0_0_t mca_pml_ob1_component;
#endif

61
src/mca/pml/ob1/pml_ob1_endpoint.c Обычный файл
Просмотреть файл

@ -0,0 +1,61 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "mca/pml/pml.h"
#include "pml_ob1_endpoint.h"
static void mca_pml_ob1_ep_array_construct(mca_pml_ob1_ep_array_t* array)
{
array->arr_endpoints = NULL;
array->arr_size = 0;
array->arr_index = 0;
array->arr_reserve = 0;
}
static void mca_pml_ob1_ep_array_destruct(mca_pml_ob1_ep_array_t* array)
{
if(NULL != array->arr_endpoints)
free(array->arr_endpoints);
}
OBJ_CLASS_INSTANCE(
mca_pml_ob1_ep_array_t,
ompi_object_t,
mca_pml_ob1_ep_array_construct,
mca_pml_ob1_ep_array_destruct
);
int mca_pml_ob1_ep_array_reserve(mca_pml_ob1_ep_array_t* array, size_t size)
{
size_t old_len = sizeof(mca_pml_ob1_endpoint_t)*array->arr_reserve;
size_t new_len = sizeof(mca_pml_ob1_endpoint_t)*size;
if(old_len >= new_len)
return OMPI_SUCCESS;
array->arr_endpoints = realloc(array->arr_endpoints, new_len);
if(NULL == array->arr_endpoints)
return OMPI_ERR_OUT_OF_RESOURCE;
memset((unsigned char*)array->arr_endpoints + old_len, 0, new_len-old_len);
array->arr_reserve = size;
return OMPI_SUCCESS;
}

155
src/mca/pml/ob1/pml_ob1_endpoint.h Обычный файл
Просмотреть файл

@ -0,0 +1,155 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_GEN2_ENDPOINT_H
#define MCA_PML_GEN2_ENDPOINT_H
#include "util/output.h"
#include "mca/bmi/bmi.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* A data structure associated with a ompi_proc_t that caches
* addressing/scheduling attributes for a specific NTL instance
* that can be used to reach the process.
*/
struct mca_pml_ob1_endpoint_t {
int bmi_weight; /**< NTL weight for scheduling */
size_t bmi_eager_limit; /**< NTL eager limit */
struct mca_bmi_base_module_t *bmi; /**< NTL module */
struct mca_bmi_base_endpoint_t* bmi_endpoint; /**< NTL addressing info */
struct mca_bmi_base_descriptor_t* bmi_cache;
/* NTL function table */
mca_bmi_base_module_alloc_fn_t bmi_alloc;
mca_bmi_base_module_free_fn_t bmi_free;
mca_bmi_base_module_send_fn_t bmi_send;
mca_bmi_base_module_put_fn_t bmi_put;
mca_bmi_base_module_get_fn_t bmi_get;
};
typedef struct mca_pml_ob1_endpoint_t mca_pml_ob1_endpoint_t;
/**
* A dynamically growable array of mca_pml_ob1_endpoint_t instances.
* Maintains an index into the array that is used for round-robin
* scheduling across contents.
*/
struct mca_pml_ob1_ep_array_t {
ompi_object_t super;
size_t arr_size; /**< number available */
size_t arr_reserve; /**< size of allocated bmi_proc array */
size_t arr_index; /**< last used index*/
mca_pml_ob1_endpoint_t* arr_endpoints; /**< array of bmi endpoints */
};
typedef struct mca_pml_ob1_ep_array_t mca_pml_ob1_ep_array_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_ob1_ep_array_t);
/**
* If required, reallocate (grow) the array to the indicate size.
*
* @param array (IN)
* @param size (IN)
*/
int mca_pml_ob1_ep_array_reserve(mca_pml_ob1_ep_array_t*, size_t);
static inline size_t mca_pml_ob1_ep_array_get_size(mca_pml_ob1_ep_array_t* array)
{
return array->arr_size;
}
/**
* Grow the array if required, and set the size.
*
* @param array (IN)
* @param size (IN)
*/
static inline void mca_pml_ob1_ep_array_set_size(mca_pml_ob1_ep_array_t* array, size_t size)
{
if(array->arr_size > array->arr_reserve)
mca_pml_ob1_ep_array_reserve(array, size);
array->arr_size = size;
}
/**
* Grow the array size by one and return the item at that index.
*
* @param array (IN)
*/
static inline mca_pml_ob1_endpoint_t* mca_pml_ob1_ep_array_insert(mca_pml_ob1_ep_array_t* array)
{
#if OMPI_ENABLE_DEBUG
if(array->arr_size >= array->arr_reserve) {
ompi_output(0, "mca_pml_ob1_ep_array_insert: invalid array index %d >= %d",
array->arr_size, array->arr_reserve);
return 0;
}
#endif
return &array->arr_endpoints[array->arr_size++];
}
/**
* Return an array item at the specified index.
*
* @param array (IN)
* @param index (IN)
*/
static inline mca_pml_ob1_endpoint_t* mca_pml_ob1_ep_array_get_index(mca_pml_ob1_ep_array_t* array, size_t index)
{
#if OMPI_ENABLE_DEBUG
if(index >= array->arr_size) {
ompi_output(0, "mca_pml_ob1_ep_array_get_index: invalid array index %d >= %d",
index, array->arr_size);
return 0;
}
#endif
return &array->arr_endpoints[index];
}
/**
* Return the next LRU index in the array.
*
* @param array (IN)
* @param index (IN)
*/
static inline mca_pml_ob1_endpoint_t* mca_pml_ob1_ep_array_get_next(mca_pml_ob1_ep_array_t* array)
{
mca_pml_ob1_endpoint_t* endpoint;
#if OMPI_ENABLE_DEBUG
if(array->arr_size == 0) {
ompi_output(0, "mca_pml_ob1_ep_array_get_next: invalid array size");
return 0;
}
#endif
endpoint = &array->arr_endpoints[array->arr_index++];
if(array->arr_index == array->arr_size) {
array->arr_index = 0;
}
return endpoint;
}
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

223
src/mca/pml/ob1/pml_ob1_hdr.h Обычный файл
Просмотреть файл

@ -0,0 +1,223 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_GEN2_HEADER_H
#define MCA_PML_GEN2_HEADER_H
#include "ompi_config.h"
#include "mca/ptl/ptl.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#define MCA_PML_GEN2_HDR_TYPE_MATCH 1
#define MCA_PML_GEN2_HDR_TYPE_RNDV 2
#define MCA_PML_GEN2_HDR_TYPE_FRAG 3
#define MCA_PML_GEN2_HDR_TYPE_ACK 4
#define MCA_PML_GEN2_HDR_TYPE_NACK 5
#define MCA_PML_GEN2_HDR_TYPE_GET 6
#define MCA_PML_GEN2_HDR_TYPE_FIN 7
#define MCA_PML_GEN2_HDR_TYPE_MAX 8
#define MCA_PML_GEN2_HDR_FLAGS_ACK 1 /* is an ack required */
#define MCA_PML_GEN2_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */
/*
* Convert a 64 bit value to network byte order.
*/
static inline uint64_t hton64(uint64_t val)
{
union { uint64_t ll;
uint32_t l[2];
} w, r;
/* platform already in network byte order? */
if(htonl(1) == 1L)
return val;
w.ll = val;
r.l[0] = htonl(w.l[1]);
r.l[1] = htonl(w.l[0]);
return r.ll;
}
/*
* Convert a 64 bit value from network to host byte order.
*/
static inline uint64_t ntoh64(uint64_t val)
{
union { uint64_t ll;
uint32_t l[2];
} w, r;
/* platform already in network byte order? */
if(htonl(1) == 1L)
return val;
w.ll = val;
r.l[0] = ntohl(w.l[1]);
r.l[1] = ntohl(w.l[0]);
return r.ll;
}
/**
* Common hdr attributes - must be first element in each hdr type
*/
struct mca_pml_ob1_common_hdr_t {
uint8_t hdr_type; /**< type of envelope */
uint8_t hdr_flags; /**< flags indicating how fragment should be processed */
};
typedef struct mca_pml_ob1_common_hdr_t mca_pml_ob1_common_hdr_t;
#define MCA_PML_GEN2_COMMON_HDR_NTOH(h)
#define MCA_PML_GEN2_COMMON_HDR_HTON(h)
/**
* Header definition for the first fragment, contains the
* attributes required to match the corresponding posted receive.
*/
struct mca_pml_ob1_match_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
uint16_t hdr_contextid; /**< communicator index */
int32_t hdr_src; /**< source rank */
int32_t hdr_dst; /**< destination rank */
int32_t hdr_tag; /**< user tag */
uint64_t hdr_msg_length; /**< message length */
uint16_t hdr_msg_seq; /**< message sequence number */
};
typedef struct mca_pml_ob1_match_hdr_t mca_pml_ob1_match_hdr_t;
#define MCA_PML_GEN2_MATCH_HDR_NTOH(h) \
do { \
MCA_PML_GEN2_COMMON_HDR_NTOH((h).hdr_common); \
(h).hdr_contextid = ntohs((h).hdr_contextid); \
(h).hdr_src = ntohl((h).hdr_src); \
(h).hdr_dst = ntohl((h).hdr_dst); \
(h).hdr_tag = ntohl((h).hdr_tag); \
(h).hdr_msg_length = ntoh64((h).hdr_msg_length); \
(h).hdr_msg_seq = ntohs((h).hdr_msg_seq); \
} while (0)
#define MCA_PML_GEN2_MATCH_HDR_HTON(h) \
do { \
MCA_PML_GEN2_COMMON_HDR_HTON((h).hdr_common); \
(h).hdr_contextid = htons((h).hdr_contextid); \
(h).hdr_src = htonl((h).hdr_src); \
(h).hdr_dst = htonl((h).hdr_dst); \
(h).hdr_tag = htonl((h).hdr_tag); \
(h).hdr_msg_length = hton64((h).hdr_msg_length); \
(h).hdr_msg_seq = htons((h).hdr_msg_seq); \
} while (0)
/**
* Header definition for the first fragment when an acknowledgment
* is required. This could be the first fragment of a large message
* or a short message that requires an ack (synchronous).
*/
struct mca_pml_ob1_rendezvous_hdr_t {
mca_pml_ob1_match_hdr_t hdr_match;
uint64_t hdr_frag_length; /**< fragment length */
ompi_ptr_t hdr_src_ptr; /**< pointer to source fragment - returned in ack */
};
typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
#define MCA_PML_GEN2_RNDV_HDR_NTOH(h) \
do { \
MCA_PML_GEN2_MATCH_HDR_NTOH((h).hdr_match); \
(h).hdr_frag_length = ntoh64((h).hdr_frag_length); \
} while (0)
#define MCA_PML_GEN2_RNDV_HDR_HTON(h) \
do { \
MCA_PML_GEN2_MATCH_HDR_HTON((h).hdr_match); \
(h).hdr_frag_length = hton64((h).hdr_frag_length); \
} while (0)
/**
* Header for subsequent fragments.
*/
struct mca_pml_ob1_frag_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
uint64_t hdr_frag_length; /**< fragment length */
uint64_t hdr_frag_offset; /**< offset into message */
ompi_ptr_t hdr_src_ptr; /**< pointer to source fragment */
ompi_ptr_t hdr_dst_ptr; /**< pointer to matched receive */
};
typedef struct mca_pml_ob1_frag_hdr_t mca_pml_ob1_frag_hdr_t;
#define MCA_PML_GEN2_FRAG_HDR_NTOH(h) \
do { \
MCA_PML_GEN2_COMMON_HDR_NTOH((h).hdr_common); \
(h).hdr_frag_length = ntoh64((h).hdr_frag_length); \
(h).hdr_frag_offset = ntoh64((h).hdr_frag_offset); \
} while (0)
#define MCA_PML_GEN2_FRAG_HDR_HTON(h) \
do { \
MCA_PML_GEN2_COMMON_HDR_HTON((h).hdr_common); \
(h).hdr_frag_length = hton64((h).hdr_frag_length); \
(h).hdr_frag_offset = hton64((h).hdr_frag_offset); \
} while (0)
/**
* Header used to acknowledgment outstanding fragment(s).
*/
struct mca_pml_ob1_ack_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
ompi_ptr_t hdr_src_ptr; /**< source fragment */
ompi_ptr_t hdr_dst_match; /**< matched receive request */
ompi_ptr_t hdr_dst_addr; /**< posted receive buffer */
uint64_t hdr_dst_size; /**< size of posted buffer */
/* sequence range? */
};
typedef struct mca_pml_ob1_ack_hdr_t mca_pml_ob1_ack_hdr_t;
#define MCA_PML_GEN2_ACK_HDR_NTOH(h) \
do { \
MCA_PML_GEN2_COMMON_HDR_NTOH(h.hdr_common); \
(h).hdr_dst_size = ntoh64((h).hdr_dst_size); \
} while (0)
#define MCA_PML_GEN2_ACK_HDR_HTON(h) \
do { \
MCA_PML_GEN2_COMMON_HDR_HTON((h).hdr_common); \
(h).hdr_dst_size = hton64((h).hdr_dst_size); \
} while (0)
/**
* Union of defined hdr types.
*/
union mca_pml_ob1_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common;
mca_pml_ob1_match_hdr_t hdr_match;
mca_pml_ob1_rendezvous_hdr_t hdr_rndv;
mca_pml_ob1_frag_hdr_t hdr_frag;
mca_pml_ob1_ack_hdr_t hdr_ack;
};
typedef union mca_pml_ob1_hdr_t mca_pml_ob1_hdr_t;
#endif

88
src/mca/pml/ob1/pml_ob1_iprobe.c Обычный файл
Просмотреть файл

@ -0,0 +1,88 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "request/request.h"
#include "pml_ob1_recvreq.h"
int mca_pml_ob1_iprobe(int src,
int tag,
struct ompi_communicator_t *comm,
int *matched, ompi_status_public_t * status)
{
int rc;
mca_pml_ob1_recv_request_t recvreq;
OBJ_CONSTRUCT( &recvreq, mca_pml_ob1_recv_request_t );
recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML;
recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_IPROBE;
MCA_PML_GEN2_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char, src, tag, comm, true);
MCA_PML_GEN2_RECV_REQUEST_START(&recvreq);
if( recvreq.req_recv.req_base.req_ompi.req_complete == true ) {
if( NULL != status ) {
*status = recvreq.req_recv.req_base.req_ompi.req_status;
}
*matched = 1;
} else {
*matched = 0;
ompi_progress();
}
MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv );
return rc;
}
int mca_pml_ob1_probe(int src,
int tag,
struct ompi_communicator_t *comm,
ompi_status_public_t * status)
{
mca_pml_ob1_recv_request_t recvreq;
OBJ_CONSTRUCT( &recvreq, mca_pml_ob1_recv_request_t );
recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML;
recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_PROBE;
MCA_PML_GEN2_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char, src, tag, comm, true);
MCA_PML_GEN2_RECV_REQUEST_START(&recvreq);
if (recvreq.req_recv.req_base.req_ompi.req_complete == false) {
/* give up and sleep until completion */
if (ompi_using_threads()) {
ompi_mutex_lock(&ompi_request_lock);
ompi_request_waiting++;
while (recvreq.req_recv.req_base.req_ompi.req_complete == false)
ompi_condition_wait(&ompi_request_cond, &ompi_request_lock);
ompi_request_waiting--;
ompi_mutex_unlock(&ompi_request_lock);
} else {
ompi_request_waiting++;
while (recvreq.req_recv.req_base.req_ompi.req_complete == false)
ompi_condition_wait(&ompi_request_cond, &ompi_request_lock);
ompi_request_waiting--;
}
}
if (NULL != status) {
*status = recvreq.req_recv.req_base.req_ompi.req_status;
}
MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv );
return OMPI_SUCCESS;
}

112
src/mca/pml/ob1/pml_ob1_irecv.c Обычный файл
Просмотреть файл

@ -0,0 +1,112 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "request/request.h"
#include "pml_ob1_recvreq.h"
int mca_pml_ob1_irecv_init(void *addr,
size_t count,
ompi_datatype_t * datatype,
int src,
int tag,
struct ompi_communicator_t *comm,
struct ompi_request_t **request)
{
int rc;
mca_pml_ob1_recv_request_t *recvreq;
MCA_PML_GEN2_RECV_REQUEST_ALLOC(recvreq, rc);
if (NULL == recvreq)
return rc;
MCA_PML_GEN2_RECV_REQUEST_INIT(recvreq,
addr,
count, datatype, src, tag, comm, true);
*request = (ompi_request_t *) recvreq;
return OMPI_SUCCESS;
}
int mca_pml_ob1_irecv(void *addr,
size_t count,
ompi_datatype_t * datatype,
int src,
int tag,
struct ompi_communicator_t *comm,
struct ompi_request_t **request)
{
int rc;
mca_pml_ob1_recv_request_t *recvreq;
MCA_PML_GEN2_RECV_REQUEST_ALLOC(recvreq, rc);
if (NULL == recvreq)
return rc;
MCA_PML_GEN2_RECV_REQUEST_INIT(recvreq,
addr,
count, datatype, src, tag, comm, false);
MCA_PML_GEN2_RECV_REQUEST_START(recvreq);
*request = (ompi_request_t *) recvreq;
return OMPI_SUCCESS;
}
int mca_pml_ob1_recv(void *addr,
size_t count,
ompi_datatype_t * datatype,
int src,
int tag,
struct ompi_communicator_t *comm,
ompi_status_public_t * status)
{
int rc;
mca_pml_ob1_recv_request_t *recvreq;
MCA_PML_GEN2_RECV_REQUEST_ALLOC(recvreq, rc);
if (NULL == recvreq)
return rc;
MCA_PML_GEN2_RECV_REQUEST_INIT(recvreq,
addr,
count, datatype, src, tag, comm, false);
MCA_PML_GEN2_RECV_REQUEST_START(recvreq);
if (recvreq->req_recv.req_base.req_ompi.req_complete == false) {
/* give up and sleep until completion */
if (ompi_using_threads()) {
ompi_mutex_lock(&ompi_request_lock);
ompi_request_waiting++;
while (recvreq->req_recv.req_base.req_ompi.req_complete == false)
ompi_condition_wait(&ompi_request_cond, &ompi_request_lock);
ompi_request_waiting--;
ompi_mutex_unlock(&ompi_request_lock);
} else {
ompi_request_waiting++;
while (recvreq->req_recv.req_base.req_ompi.req_complete == false)
ompi_condition_wait(&ompi_request_cond, &ompi_request_lock);
ompi_request_waiting--;
}
}
if (NULL != status) { /* return status */
*status = recvreq->req_recv.req_base.req_ompi.req_status;
}
rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
OBJ_RELEASE(recvreq);
return rc;
}

129
src/mca/pml/ob1/pml_ob1_isend.c Обычный файл
Просмотреть файл

@ -0,0 +1,129 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "pml_ob1.h"
#include "pml_ob1_proc.h"
#include "pml_ob1_sendreq.h"
#include "pml_ob1_recvreq.h"
int mca_pml_ob1_isend_init(void *buf,
size_t count,
ompi_datatype_t * datatype,
int dst,
int tag,
mca_pml_base_send_mode_t sendmode,
ompi_communicator_t * comm,
ompi_request_t ** request)
{
int rc;
mca_pml_ob1_send_request_t *sendreq;
MCA_PML_GEN2_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc);
if (rc != OMPI_SUCCESS)
return rc;
MCA_PML_GEN2_SEND_REQUEST_INIT(sendreq,
buf,
count,
datatype,
dst, tag,
comm, sendmode, true);
*request = (ompi_request_t *) sendreq;
return OMPI_SUCCESS;
}
int mca_pml_ob1_isend(void *buf,
size_t count,
ompi_datatype_t * datatype,
int dst,
int tag,
mca_pml_base_send_mode_t sendmode,
ompi_communicator_t * comm,
ompi_request_t ** request)
{
int rc;
mca_pml_ob1_send_request_t *sendreq;
MCA_PML_GEN2_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc);
if (rc != OMPI_SUCCESS)
return rc;
MCA_PML_GEN2_SEND_REQUEST_INIT(sendreq,
buf,
count,
datatype,
dst, tag,
comm, sendmode, false);
MCA_PML_GEN2_SEND_REQUEST_START(sendreq, rc);
*request = (ompi_request_t *) sendreq;
return rc;
}
int mca_pml_ob1_send(void *buf,
size_t count,
ompi_datatype_t * datatype,
int dst,
int tag,
mca_pml_base_send_mode_t sendmode,
ompi_communicator_t * comm)
{
int rc;
mca_pml_ob1_send_request_t *sendreq;
MCA_PML_GEN2_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc);
if (rc != OMPI_SUCCESS)
return rc;
MCA_PML_GEN2_SEND_REQUEST_INIT(sendreq,
buf,
count,
datatype,
dst, tag,
comm, sendmode, false);
MCA_PML_GEN2_SEND_REQUEST_START(sendreq, rc);
if (rc != OMPI_SUCCESS) {
MCA_PML_GEN2_FREE((ompi_request_t **) & sendreq);
return rc;
}
if (sendreq->req_send.req_base.req_ompi.req_complete == false) {
/* give up and sleep until completion */
if (ompi_using_threads()) {
ompi_mutex_lock(&ompi_request_lock);
ompi_request_waiting++;
while (sendreq->req_send.req_base.req_ompi.req_complete == false)
ompi_condition_wait(&ompi_request_cond, &ompi_request_lock);
ompi_request_waiting--;
ompi_mutex_unlock(&ompi_request_lock);
} else {
ompi_request_waiting++;
while (sendreq->req_send.req_base.req_ompi.req_complete == false)
ompi_condition_wait(&ompi_request_cond, &ompi_request_lock);
ompi_request_waiting--;
}
}
/* return request to pool */
MCA_PML_GEN2_FREE((ompi_request_t **) & sendreq);
return OMPI_SUCCESS;
}

592
src/mca/pml/ob1/pml_ob1_match.c Обычный файл
Просмотреть файл

@ -0,0 +1,592 @@
/** @file */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdio.h>
#include "class/ompi_list.h"
#include "threads/mutex.h"
#include "include/constants.h"
#include "communicator/communicator.h"
#include "pml_ob1.h"
#include "pml_ob1_comm.h"
#include "pml_ob1_recvfrag.h"
#include "pml_ob1_recvreq.h"
#include "pml_ob1_hdr.h"
#include "pml_ob1_match.h"
/**
* Try and match the incoming message fragment to the list of
* "wild" receives
*
* @param hdr Matching data from recived fragment (IN)
*
* @param pml_comm Pointer to the communicator structure used for
* matching purposes. (IN)
*
* @return Matched receive
*
* This routine assumes that the appropriate matching locks are
* set by the upper level routine.
*/
#define MCA_PML_GEN2_CHECK_WILD_RECEIVES_FOR_MATCH(hdr,comm,proc,return_match) \
do { \
/* local parameters */ \
ompi_list_t* wild_receives = &comm->wild_receives; \
mca_pml_ob1_recv_request_t *wild_recv; \
int frag_tag,recv_tag; \
\
/* initialization */ \
frag_tag=hdr->hdr_tag; \
\
/* \
* Loop over the wild irecvs - no need to lock, the upper level \
* locking is protecting from having other threads trying to \
* change this list. \
*/ \
for(wild_recv = (mca_pml_ob1_recv_request_t *) \
ompi_list_get_first(wild_receives); \
wild_recv != (mca_pml_ob1_recv_request_t *) \
ompi_list_get_end(wild_receives); \
wild_recv = (mca_pml_ob1_recv_request_t *) \
((ompi_list_item_t *)wild_recv)->ompi_list_next) { \
\
recv_tag = wild_recv->req_recv.req_base.req_tag; \
if ( \
/* exact tag match */ \
(frag_tag == recv_tag) || \
/* wild tag match - negative tags (except for \
* OMPI_ANY_TAG) are reserved for internal use, and will \
* not be matched with OMPI_ANY_TAG */ \
( (recv_tag == OMPI_ANY_TAG) && (0 <= frag_tag) ) ) \
\
{ \
/* \
* Mark that this is the matching irecv, and go to process it. \
*/ \
return_match = wild_recv; \
\
/* remove this irecv from the postd wild ireceive list */ \
ompi_list_remove_item(wild_receives, \
(ompi_list_item_t *)wild_recv); \
\
/* found match - no need to continue */ \
break; \
} \
} \
} while(0)
/**
* Try and match the incoming message fragment to the list of
* "specific" receives
*
* @param hdr Matching data from recived fragment (IN)
*
* @param comm Pointer to the communicator structure used for
* matching purposes. (IN)
*
* @return Matched receive
*
* This routine assumes that the appropriate matching locks are
* set by the upper level routine.
*/
#define MCA_PML_GEN2_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(hdr,comm,proc,return_match) \
do { \
/* local variables */ \
ompi_list_t* specific_receives = &proc->specific_receives; \
mca_pml_ob1_recv_request_t *specific_recv; \
int recv_tag,frag_tag; \
\
/* initialization */ \
frag_tag=hdr->hdr_tag; \
\
/* \
* Loop over the specific irecvs. \
*/ \
for(specific_recv = (mca_pml_ob1_recv_request_t *) \
ompi_list_get_first(specific_receives); \
specific_recv != (mca_pml_ob1_recv_request_t *) \
ompi_list_get_end(specific_receives); \
specific_recv = (mca_pml_ob1_recv_request_t *) \
((ompi_list_item_t *)specific_recv)->ompi_list_next) { \
/* \
* Check for a match \
*/ \
recv_tag = specific_recv->req_recv.req_base.req_tag; \
if ( (frag_tag == recv_tag) || \
( (recv_tag == OMPI_ANY_TAG) && (0 <= frag_tag) ) ) { \
\
/* \
* Match made \
*/ \
return_match = specific_recv; \
\
/* remove descriptor from posted specific ireceive list */ \
ompi_list_remove_item(specific_receives, \
(ompi_list_item_t *)specific_recv); \
\
break; \
} \
} \
} while(0)
/**
* Try and match the incoming message fragment to the list of
* "wild" receives and "specific" receives. Used when both types
* of receives have been posted, i.e. when we need to coordinate
* between multiple lists to make sure ordered delivery occurs.
*
* @param hdr Matching data from recived fragment (IN)
*
* @param comm Pointer to the communicator structure used for
* matching purposes. (IN)
*
* @return Matched receive
*
* This routine assumes that the appropriate matching locks are
* set by the upper level routine.
*/
#define MCA_PML_GEN2_CHECK_SPECIFIC_AND_WILD_RECEIVES_FOR_MATCH( \
hdr,comm,proc,return_match) \
do { \
/* local variables */ \
mca_pml_ob1_recv_request_t *specific_recv, *wild_recv; \
mca_ptl_sequence_t wild_recv_seq, specific_recv_seq; \
int frag_tag, wild_recv_tag, specific_recv_tag; \
\
/* initialization */ \
frag_tag=hdr->hdr_tag; \
\
/* \
* We know that when this is called, both specific and wild irecvs \
* have been posted. \
*/ \
specific_recv = (mca_pml_ob1_recv_request_t *) \
ompi_list_get_first(&(proc)->specific_receives); \
wild_recv = (mca_pml_ob1_recv_request_t *) \
ompi_list_get_first(&comm->wild_receives); \
\
specific_recv_seq = specific_recv->req_recv.req_base.req_sequence; \
wild_recv_seq = wild_recv->req_recv.req_base.req_sequence; \
\
while (true) { \
if (wild_recv_seq < specific_recv_seq) { \
/* \
* wild recv is earlier than the specific one. \
*/ \
/* \
* try and match \
*/ \
wild_recv_tag = wild_recv->req_recv.req_base.req_tag; \
if ( (frag_tag == wild_recv_tag) || \
( (wild_recv_tag == OMPI_ANY_TAG) && (0 <= frag_tag) ) ) { \
/* \
* Match made \
*/ \
return_match=wild_recv; \
\
/* remove this recv from the wild receive queue */ \
ompi_list_remove_item(&comm->wild_receives, \
(ompi_list_item_t *)wild_recv); \
break; \
} \
\
/* \
* No match, go to the next. \
*/ \
wild_recv=(mca_pml_ob1_recv_request_t *) \
((ompi_list_item_t *)wild_recv)->ompi_list_next; \
\
/* \
* If that was the last wild one, just look at the \
* rest of the specific ones. \
*/ \
if (wild_recv == (mca_pml_ob1_recv_request_t *) \
ompi_list_get_end(&comm->wild_receives) ) \
{ \
MCA_PML_GEN2_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(hdr, comm, proc, return_match); \
break; \
} \
\
/* \
* Get the sequence number for this recv, and go \
* back to the top of the loop. \
*/ \
wild_recv_seq = wild_recv->req_recv.req_base.req_sequence; \
\
} else { \
/* \
* specific recv is earlier than the wild one. \
*/ \
specific_recv_tag=specific_recv->req_recv.req_base.req_tag; \
if ( (frag_tag == specific_recv_tag) || \
( (specific_recv_tag == OMPI_ANY_TAG) && (0<=frag_tag)) ) \
{ \
/* \
* Match made \
*/ \
return_match = specific_recv; \
/* remove descriptor from specific receive list */ \
ompi_list_remove_item(&(proc)->specific_receives, \
(ompi_list_item_t *)specific_recv); \
break; \
} \
\
/* \
* No match, go on to the next specific irecv. \
*/ \
specific_recv = (mca_pml_ob1_recv_request_t *) \
((ompi_list_item_t *)specific_recv)->ompi_list_next; \
\
/* \
* If that was the last specific irecv, process the \
* rest of the wild ones. \
*/ \
if (specific_recv == (mca_pml_ob1_recv_request_t *) \
ompi_list_get_end(&(proc)->specific_receives)) \
{ \
MCA_PML_GEN2_CHECK_WILD_RECEIVES_FOR_MATCH(hdr, comm, proc, return_match); \
break; \
} \
/* \
* Get the sequence number for this recv, and go \
* back to the top of the loop. \
*/ \
specific_recv_seq = specific_recv->req_recv.req_base.req_sequence; \
} \
} \
} while(0)
/*
* Specialized matching routines for internal use only.
*/
static bool mca_pml_ob1_check_cantmatch_for_match(
ompi_list_t *additional_matches,
mca_pml_ob1_comm_t* comm,
mca_pml_ob1_comm_proc_t *proc);
/**
* RCS/CTS receive side matching
*
* @param hdr list of parameters needed for matching
* This list is also embeded in frag_desc,
* but this allows to save a memory copy when
* a match is made in this routine. (IN)
* @param frag_desc pointer to receive fragment which we want
* to match (IN/OUT). If a match is not made,
* hdr is copied to frag_desc.
* @param match_made parameter indicating if we matched frag_desc/
* hdr (OUT)
* @param additional_matches if a match is made with frag_desc, we
* may be able to match fragments that previously
* have arrived out-of-order. If this is the
* case, the associated fragment descriptors are
* put on this list for further processing. (OUT)
*
* @return OMPI error code
*
* This routine is used to try and match a newly arrived message fragment
* to pre-posted receives. The following assumptions are made
* - fragments are received out of order
* - for long messages, e.g. more than one fragment, a RTS/CTS algorithm
* is used.
* - 2nd and greater fragments include a receive descriptor pointer
* - fragments may be dropped
* - fragments may be corrupt
* - this routine may be called simultaneously by more than one thread
*/
int mca_pml_ob1_match(
mca_bmi_base_module_t* bmi,
mca_pml_ob1_match_hdr_t *hdr,
mca_bmi_base_segment_t* segments,
size_t num_segments)
{
/* local variables */
uint16_t next_msg_seq_expected, frag_msg_seq;
ompi_communicator_t *comm_ptr;
mca_pml_ob1_recv_request_t *matched_receive = NULL;
mca_pml_ob1_comm_t *comm;
mca_pml_ob1_comm_proc_t *proc;
bool additional_match=false;
ompi_list_t additional_matches;
int rc;
/* communicator pointer */
comm_ptr=ompi_comm_lookup(hdr->hdr_contextid);
comm=(mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm;
/* source sequence number */
frag_msg_seq = hdr->hdr_msg_seq;
proc = comm->procs + hdr->hdr_src;
/* get next expected message sequence number - if threaded
* run, lock to make sure that if another thread is processing
* a frag from the same message a match is made only once.
* Also, this prevents other posted receives (for a pair of
* end points) from being processed, and potentially "loosing"
* the fragment.
*/
OMPI_THREAD_LOCK(&comm->matching_lock);
/* get sequence number of next message that can be processed */
next_msg_seq_expected = (uint16_t)proc->expected_sequence;
if (frag_msg_seq == next_msg_seq_expected) {
/*
* This is the sequence number we were expecting,
* so we can try matching it to already posted
* receives.
*/
/* We're now expecting the next sequence number. */
(proc->expected_sequence)++;
/*
* figure out what sort of matching logic to use, if need to
* look only at "specific" receives, or "wild" receives,
* or if we need to traverse both sets at the same time.
*/
if (ompi_list_get_size(&proc->specific_receives) == 0 ){
/*
* There are only wild irecvs, so specialize the algorithm.
*/
MCA_PML_GEN2_CHECK_WILD_RECEIVES_FOR_MATCH(hdr, comm, proc, matched_receive);
} else if (ompi_list_get_size(&comm->wild_receives) == 0 ) {
/*
* There are only specific irecvs, so specialize the algorithm.
*/
MCA_PML_GEN2_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(hdr, comm, proc, matched_receive);
} else {
/*
* There are some of each.
*/
MCA_PML_GEN2_CHECK_SPECIFIC_AND_WILD_RECEIVES_FOR_MATCH(hdr, comm, proc, matched_receive);
}
/* if match found, process data */
if (matched_receive) {
/* set length of incoming message */
matched_receive->req_recv.req_bytes_packed = hdr->hdr_msg_length;
/*
* update delivered sequence number information, if needed.
*/
if( (matched_receive->req_recv.req_base.req_type == MCA_PML_REQUEST_PROBE) ) {
/* Match a probe, rollback the next expected sequence number */
(proc->expected_sequence)--;
}
} else {
/* if no match found, place on unexpected queue */
mca_pml_ob1_recv_frag_t* frag;
MCA_PML_GEN2_RECV_FRAG_ALLOC(frag, rc);
if(OMPI_SUCCESS != rc) {
OMPI_THREAD_UNLOCK(&pml_comm->matching_lock);
return rc;
}
MCA_PML_GEN2_RECV_FRAG_INIT(frag,bmi,hdr,segments,num_segments);
ompi_list_append( &proc->unexpected_frags, (ompi_list_item_t *)frag );
}
/*
* Now that new message has arrived, check to see if
* any fragments on the c_c_frags_cant_match list
* may now be used to form new matchs
*/
if (0 < ompi_list_get_size(&proc->frags_cant_match)) {
additional_match = mca_pml_ob1_check_cantmatch_for_match(&additional_matches,comm,proc);
}
} else {
/*
* This message comes after the next expected, so it
* is ahead of sequence. Save it for later.
*/
mca_pml_ob1_recv_frag_t* frag;
MCA_PML_GEN2_RECV_FRAG_ALLOC(frag, rc);
if(OMPI_SUCCESS != rc) {
OMPI_THREAD_UNLOCK(&pml_comm->matching_lock);
return rc;
}
MCA_PML_GEN2_RECV_FRAG_INIT(frag,bmi,hdr,segments,num_segments);
ompi_list_append(&proc->frags_cant_match, (ompi_list_item_t *)frag);
}
OMPI_THREAD_UNLOCK(&pml_comm->matching_lock);
/* release matching lock before processing fragment */
if(matched_receive != NULL) {
mca_pml_ob1_recv_request_progress(matched_receive,bmi,segments,num_segments);
} else {
ompi_output(0, "match not found\n");
}
if(additional_match) {
ompi_list_item_t* item;
while(NULL != (item = ompi_list_remove_first(&additional_matches))) {
#if 0
mca_pml_ob1_recv_frag_t* frag = (mca_pml_ob1_recv_frag_t*)item;
mca_pml_ob1_recv_request_progress(frag->request,frag->bmi,frag->segments,frag->num_segments);
MCA_PML_GEN2_RECV_FRAG_RETURN(frag);
#endif
}
}
return OMPI_SUCCESS;
}
/**
* Scan the list of frags that came in ahead of time to see if any
* can be processed at this time. If they can, try and match the
* frags.
*
* @param additional_matches List to hold new matches with fragments
* from the c_frags_cant_match list. (IN/OUT)
*
* @param pml_comm Pointer to the communicator structure used for
* matching purposes. (IN)
*
* This routine assumes that the appropriate matching locks are
* set by the upper level routine.
*/
static bool mca_pml_ob1_check_cantmatch_for_match(
ompi_list_t *additional_matches,
mca_pml_ob1_comm_t* comm,
mca_pml_ob1_comm_proc_t *proc)
{
/* local parameters */
int match_found;
uint16_t next_msg_seq_expected, frag_seq;
mca_pml_ob1_recv_frag_t *frag_desc;
mca_pml_ob1_recv_request_t *matched_receive = NULL;
bool match_made = false;
/*
* Loop over all the out of sequence messages. No ordering is assumed
* in the c_frags_cant_match list.
*/
match_found = 1;
while ((0 < ompi_list_get_size(&proc->frags_cant_match)) && match_found) {
/* initialize match flag for this search */
match_found = 0;
/* get sequence number of next message that can be processed */
next_msg_seq_expected = proc->expected_sequence;
/* search the list for a fragment from the send with sequence
* number next_msg_seq_expected
*/
for(frag_desc = (mca_pml_ob1_recv_frag_t *)
ompi_list_get_first(&proc->frags_cant_match);
frag_desc != (mca_pml_ob1_recv_frag_t *)
ompi_list_get_end(&proc->frags_cant_match);
frag_desc = (mca_pml_ob1_recv_frag_t *)
ompi_list_get_next(frag_desc))
{
/*
* If the message has the next expected seq from that proc...
*/
frag_seq=frag_desc->hdr.hdr_match.hdr_msg_seq;
if (frag_seq == next_msg_seq_expected) {
mca_pml_ob1_match_hdr_t* hdr = &frag_desc->hdr.hdr_match;
/* We're now expecting the next sequence number. */
(proc->expected_sequence)++;
/* signal that match was made */
match_found = 1;
/*
* remove frag_desc from list
*/
ompi_list_remove_item(&proc->frags_cant_match,
(ompi_list_item_t *)frag_desc);
/*
* figure out what sort of matching logic to use, if need to
* look only at "specific" receives, or "wild" receives,
* or if we need to traverse both sets at the same time.
*/
proc = comm->procs + hdr->hdr_src;
if (ompi_list_get_size(&proc->specific_receives) == 0 ) {
/*
* There are only wild irecvs, so specialize the algorithm.
*/
MCA_PML_GEN2_CHECK_WILD_RECEIVES_FOR_MATCH(hdr, comm, proc, matched_receive);
} else if (ompi_list_get_size(&comm->wild_receives) == 0 ) {
/*
* There are only specific irecvs, so specialize the algorithm.
*/
MCA_PML_GEN2_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(hdr, comm, proc, matched_receive);
} else {
/*
* There are some of each.
*/
MCA_PML_GEN2_CHECK_SPECIFIC_AND_WILD_RECEIVES_FOR_MATCH(hdr, comm, proc, matched_receive);
}
/* if match found, process data */
if (matched_receive) {
/* associate the receive descriptor with the fragment
* descriptor */
frag_desc->request=matched_receive;
/* add this fragment descriptor to the list of
* descriptors to be processed later
*/
if(match_made == false) {
match_made = true;
OBJ_CONSTRUCT(additional_matches, ompi_list_t);
}
ompi_list_append(additional_matches, (ompi_list_item_t *)frag_desc);
} else {
/* if no match found, place on unexpected queue */
ompi_list_append( &proc->unexpected_frags, (ompi_list_item_t *)frag_desc);
}
/* c_frags_cant_match is not an ordered list, so exit loop
* and re-start search for next sequence number */
break;
} /* end if (frag_seq == next_msg_seq_expected) */
} /* end for (frag_desc) loop */
} /* end while loop */
return match_made;
}

49
src/mca/pml/ob1/pml_ob1_match.h Обычный файл
Просмотреть файл

@ -0,0 +1,49 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_GEN2_MATCH_H
#define MCA_PML_GEN2_MATCH_H
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct mca_pml_ob1_recv_frag_t;
/**
* RCS/CTS receive side matching
* Match incoming fragments against posted receives. Out of order
* delivery.
*
* @param frag_header (IN) Header of received fragment.
* @param frag_desc (IN) Received fragment descriptor.
* @param match_made (OUT) Flag indicating wether a match was made.
* @param additional_matches (OUT) List of additional matches
* @return OMPI_SUCCESS or error status on failure.
*/
OMPI_DECLSPEC int mca_pml_ob1_match(
mca_bmi_base_module_t* bmi,
mca_pml_ob1_match_hdr_t *hdr,
mca_bmi_base_segment_t* segments,
size_t num_segments);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_PML_GEN2_MATCH_H */

48
src/mca/pml/ob1/pml_ob1_proc.c Обычный файл
Просмотреть файл

@ -0,0 +1,48 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "include/sys/atomic.h"
#include "pml_ob1.h"
#include "pml_ob1_proc.h"
static void mca_pml_ob1_proc_construct(mca_pml_ob1_proc_t* proc)
{
proc->proc_ompi = NULL;
proc->proc_sequence = 0;
OBJ_CONSTRUCT(&proc->proc_lock, ompi_mutex_t);
OBJ_CONSTRUCT(&proc->bmi_first, mca_pml_ob1_ep_array_t);
OBJ_CONSTRUCT(&proc->bmi_next, mca_pml_ob1_ep_array_t);
}
static void mca_pml_ob1_proc_destruct(mca_pml_ob1_proc_t* proc)
{
OBJ_DESTRUCT(&proc->proc_lock);
OBJ_DESTRUCT(&proc->bmi_first);
OBJ_DESTRUCT(&proc->bmi_next);
}
OBJ_CLASS_INSTANCE(
mca_pml_ob1_proc_t,
ompi_object_t,
mca_pml_ob1_proc_construct,
mca_pml_ob1_proc_destruct
);

104
src/mca/pml/ob1/pml_ob1_proc.h Обычный файл
Просмотреть файл

@ -0,0 +1,104 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_PROC_H
#define MCA_PML_PROC_H
#include "threads/mutex.h"
#include "communicator/communicator.h"
#include "group/group.h"
#include "proc/proc.h"
#include "pml_ob1_endpoint.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* Structure associated w/ ompi_proc_t that contains data specific
* to the PML.
*/
struct mca_pml_proc_t {
ompi_object_t super;
ompi_proc_t *proc_ompi; /**< back-pointer to ompi_proc_t */
ompi_mutex_t proc_lock; /**< lock to protect against concurrent access */
volatile uint32_t proc_sequence; /**< sequence number for send */
mca_pml_ob1_ep_array_t bmi_first; /**< array of endpoints to use for first fragments */
mca_pml_ob1_ep_array_t bmi_next; /**< array of endpoints to use for remaining fragments */
};
typedef struct mca_pml_proc_t mca_pml_ob1_proc_t;
OMPI_COMP_EXPORT OBJ_CLASS_DECLARATION(mca_pml_ob1_proc_t);
/**
* Return the mca_pml_proc_t instance cached in the communicators local group.
*
* @param comm Communicator
* @param rank Peer rank
* @return mca_pml_proc_t instance
*/
static inline mca_pml_ob1_proc_t* mca_pml_ob1_proc_lookup_local(ompi_communicator_t* comm, int rank)
{
return comm->c_local_group->grp_proc_pointers[rank]->proc_pml;
}
/**
* Return the mca_pml_proc_t instance cached on the communicators remote group.
*
* @param comm Communicator
* @param rank Peer rank
* @return mca_pml_proc_t instance
*/
static inline mca_pml_ob1_proc_t* mca_pml_ob1_proc_lookup_remote(ompi_communicator_t* comm, int rank)
{
return comm->c_pml_procs[rank];
}
/**
* Return the mca_bmi_peer_t instance corresponding to the process/bmi combination.
*
* @param comm Communicator
* @param rank Peer rank
* @return mca_pml_proc_t instance
*/
static inline struct mca_bmi_base_endpoint_t* mca_pml_ob1_proc_lookup_remote_endpoint(
ompi_communicator_t* comm,
int rank,
struct mca_bmi_base_module_t* bmi)
{
mca_pml_ob1_proc_t* proc = comm->c_pml_procs[rank];
size_t i, size = mca_pml_ob1_ep_array_get_size(&proc->bmi_first);
mca_pml_ob1_endpoint_t* endpoint = proc->bmi_first.arr_endpoints;
for(i = 0; i < size; i++) {
if(endpoint->bmi == bmi) {
return endpoint->bmi_endpoint;
}
endpoint++;
}
return NULL;
}
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

39
src/mca/pml/ob1/pml_ob1_progress.c Обычный файл
Просмотреть файл

@ -0,0 +1,39 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "pml_ob1.h"
#include "pml_ob1_sendreq.h"
int mca_pml_ob1_progress(void)
{
size_t i;
int count = 0;
/*
* Progress each of the PTL modules
*/
for(i=0; i<mca_pml_ob1.num_bmi_progress; i++) {
int rc = mca_pml_ob1.bmi_progress[i]();
if(rc > 0) {
count += rc;
}
}
return count;
}

64
src/mca/pml/ob1/pml_ob1_recvfrag.c Обычный файл
Просмотреть файл

@ -0,0 +1,64 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#include "ompi_config.h"
#include "mca/pml/pml.h"
#include "pml_ob1_recvfrag.h"
#include "pml_ob1_recvreq.h"
#include "pml_ob1_proc.h"
#include "pml_ob1_match.h"
OBJ_CLASS_INSTANCE(
mca_pml_ob1_recv_frag_t,
ompi_list_item_t,
NULL,
NULL
);
void mca_pml_ob1_recv_callback(
mca_bmi_base_module_t* bmi,
mca_bmi_base_tag_t tag,
mca_bmi_base_descriptor_t* des,
void* cbdata)
{
mca_bmi_base_segment_t* segments = des->des_src;
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
if(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) {
return;
}
switch(hdr->hdr_common.hdr_type) {
case MCA_PML_GEN2_HDR_TYPE_MATCH:
mca_pml_ob1_match(bmi,&hdr->hdr_match,segments,des->des_src_cnt);
break;
case MCA_PML_GEN2_HDR_TYPE_RNDV:
mca_pml_ob1_match(bmi,&hdr->hdr_match,segments,des->des_src_cnt);
break;
default:
break;
}
}

76
src/mca/pml/ob1/pml_ob1_recvfrag.h Обычный файл
Просмотреть файл

@ -0,0 +1,76 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_GEN2_RECVFRAG_H
#define MCA_PML_GEN2_RECVFRAG_H
#include "mca/bmi/bmi.h"
#include "pml_ob1_hdr.h"
struct mca_pml_ob1_recv_frag_t {
ompi_list_item_t super;
mca_bmi_base_module_t* bmi;
mca_pml_ob1_hdr_t hdr;
mca_bmi_base_segment_t* segments;
size_t num_segments;
struct mca_pml_ob1_recv_request_t* request;
};
typedef struct mca_pml_ob1_recv_frag_t mca_pml_ob1_recv_frag_t;
#define MCA_PML_GEN2_RECV_FRAG_ALLOC(frag,rc) \
{ \
\
}
#define MCA_PML_GEN2_RECV_FRAG_INIT(frag,bmi,hdr,segs,cnt) \
{ \
\
}
#define MCA_PML_GEN2_RECV_FRAG_RETURN(frag) \
{ \
\
}
/**
* Called to attempt a match for new fragments.
*
* @param bmi (IN) The PTL pointer
* @param frag (IN) Receive fragment descriptor.
* @param hdr (IN) Header corresponding to the receive fragment.
* @return OMPI_SUCCESS or error status on failure.
*/
bool mca_pml_ob1_recv_frag_match(
struct mca_pml_ob1_recv_frag_t* frag
);
int mca_pml_ob1_recv_frag_matched(
struct mca_pml_ob1_recv_frag_t* frag
);
int mca_pml_ob1_recv_frag_complete(
struct mca_bmi_base_module_t* bmi,
struct mca_pml_ob1_recv_request_t* req,
struct mca_pml_ob1_recv_frag_t* frag
);
#endif

287
src/mca/pml/ob1/pml_ob1_recvreq.c Обычный файл
Просмотреть файл

@ -0,0 +1,287 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "mca/pml/pml.h"
#include "mca/bmi/bmi.h"
#include "pml_ob1_comm.h"
#include "pml_ob1_recvreq.h"
#include "pml_ob1_recvfrag.h"
#include "pml_ob1_sendreq.h"
static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc(
mca_pml_ob1_recv_request_t* request, mca_pml_ob1_comm_proc_t* proc);
static int mca_pml_ob1_recv_request_fini(struct ompi_request_t** request)
{
MCA_PML_GEN2_FINI(request);
return OMPI_SUCCESS;
}
static int mca_pml_ob1_recv_request_free(struct ompi_request_t** request)
{
MCA_PML_GEN2_FREE(request);
return OMPI_SUCCESS;
}
static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request, int complete)
{
mca_pml_ob1_recv_request_t* request = (mca_pml_ob1_recv_request_t*)ompi_request;
mca_pml_ob1_comm_t* comm = request->req_recv.req_base.req_comm->c_pml_comm;
if( true == ompi_request->req_complete ) { /* way to late to cancel this one */
return OMPI_SUCCESS;
}
/* The rest should be protected behind the match logic lock */
OMPI_THREAD_LOCK(&comm->matching_lock);
if( OMPI_ANY_TAG == ompi_request->req_status.MPI_TAG ) { /* the match has not been already done */
if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) {
ompi_list_remove_item( &comm->wild_receives, (ompi_list_item_t*)request );
} else {
mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer;
ompi_list_remove_item(&proc->specific_receives, (ompi_list_item_t*)request);
}
}
OMPI_THREAD_UNLOCK(&comm->matching_lock);
OMPI_THREAD_LOCK(&ompi_request_lock);
ompi_request->req_status._cancelled = true;
ompi_request->req_complete = true; /* mark it as completed so all the test/wait functions
* on this particular request will finish */
/* Now we have a problem if we are in a multi-threaded environment. We shou ld
* broadcast the condition on the request in order to allow the other threa ds
* to complete their test/wait functions.
*/
if(ompi_request_waiting) {
ompi_condition_broadcast(&ompi_request_cond);
}
OMPI_THREAD_UNLOCK(&ompi_request_lock);
return OMPI_SUCCESS;
}
static void mca_pml_ob1_recv_request_construct(mca_pml_ob1_recv_request_t* request)
{
request->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV;
request->req_recv.req_base.req_ompi.req_fini = mca_pml_ob1_recv_request_fini;
request->req_recv.req_base.req_ompi.req_free = mca_pml_ob1_recv_request_free;
request->req_recv.req_base.req_ompi.req_cancel = mca_pml_ob1_recv_request_cancel;
}
static void mca_pml_ob1_recv_request_destruct(mca_pml_ob1_recv_request_t* request)
{
}
OBJ_CLASS_INSTANCE(
mca_pml_ob1_recv_request_t,
mca_pml_base_recv_request_t,
mca_pml_ob1_recv_request_construct,
mca_pml_ob1_recv_request_destruct);
/*
* Update the recv request status to reflect the number of bytes
* received and actually delivered to the application.
*/
void mca_pml_ob1_recv_request_progress(
mca_pml_ob1_recv_request_t* req,
mca_bmi_base_module_t* bmi,
mca_bmi_base_segment_t* segments,
size_t num_segments)
{
size_t bytes_received = 0;
size_t bytes_delivered = 0;
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
switch(hdr->hdr_common.hdr_type) {
case MCA_PML_GEN2_HDR_TYPE_MATCH:
bytes_received = hdr->hdr_match.hdr_msg_length;
break;
case MCA_PML_GEN2_HDR_TYPE_RNDV:
bytes_received = hdr->hdr_frag.hdr_frag_length;
break;
default:
break;
}
bytes_delivered = bytes_received;
OMPI_THREAD_LOCK(&ompi_request_lock);
req->req_bytes_received += bytes_received;
req->req_bytes_delivered += bytes_delivered;
if (req->req_bytes_received >= req->req_recv.req_bytes_packed) {
/* initialize request status */
req->req_recv.req_base.req_ompi.req_status._count = req->req_bytes_delivered;
req->req_recv.req_base.req_pml_complete = true;
req->req_recv.req_base.req_ompi.req_complete = true;
if(ompi_request_waiting) {
ompi_condition_broadcast(&ompi_request_cond);
}
}
OMPI_THREAD_UNLOCK(&ompi_request_lock);
}
/*
* This routine is used to match a posted receive when the source process
* is specified.
*/
void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request)
{
mca_pml_ob1_comm_t* comm = request->req_recv.req_base.req_comm->c_pml_comm;
mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer;
mca_pml_ob1_recv_frag_t* frag;
/* check for a specific match */
OMPI_THREAD_LOCK(&comm->matching_lock);
/* assign sequence number */
request->req_recv.req_base.req_sequence = comm->recv_sequence++;
if (ompi_list_get_size(&proc->unexpected_frags) > 0 &&
(frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
OMPI_THREAD_UNLOCK(&comm->matching_lock);
mca_pml_ob1_recv_request_progress(request,frag->bmi,frag->segments,frag->num_segments);
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
MCA_PML_GEN2_RECV_FRAG_RETURN(frag);
}
return; /* match found */
}
/* We didn't find any matches. Record this irecv so we can match
* it when the message comes in.
*/
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) {
ompi_list_append(&proc->specific_receives, (ompi_list_item_t*)request);
}
OMPI_THREAD_UNLOCK(&comm->matching_lock);
}
/*
* this routine is used to try and match a wild posted receive - where
* wild is determined by the value assigned to the source process
*/
void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
{
mca_pml_ob1_comm_t* comm = request->req_recv.req_base.req_comm->c_pml_comm;
mca_pml_ob1_comm_proc_t* proc = comm->procs;
size_t proc_count = comm->num_procs;
size_t i;
/*
* Loop over all the outstanding messages to find one that matches.
* There is an outer loop over lists of messages from each
* process, then an inner loop over the messages from the
* process.
*/
OMPI_THREAD_LOCK(&pml_comm->c_matching_lock);
/* assign sequence number */
request->req_recv.req_base.req_sequence = comm->recv_sequence++;
for (i = 0; i < proc_count; i++) {
mca_pml_ob1_recv_frag_t* frag;
/* continue if no frags to match */
if (ompi_list_get_size(&proc->unexpected_frags) == 0) {
proc++;
continue;
}
/* loop over messages from the current proc */
if ((frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
OMPI_THREAD_UNLOCK(&comm->matching_lock);
mca_pml_ob1_recv_request_progress(request,frag->bmi,frag->segments,frag->num_segments);
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
MCA_PML_GEN2_RECV_FRAG_RETURN(frag);
}
return; /* match found */
}
proc++;
}
/* We didn't find any matches. Record this irecv so we can match to
* it when the message comes in.
*/
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE)
ompi_list_append(&comm->wild_receives, (ompi_list_item_t*)request);
OMPI_THREAD_UNLOCK(&comm->matching_lock);
}
/*
* this routine tries to match a posted receive. If a match is found,
* it places the request in the appropriate matched receive list.
*/
static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc(
mca_pml_ob1_recv_request_t* request,
mca_pml_ob1_comm_proc_t* proc)
{
ompi_list_t* unexpected_frags = &proc->unexpected_frags;
mca_pml_ob1_recv_frag_t* frag;
mca_pml_ob1_match_hdr_t* hdr;
int tag = request->req_recv.req_base.req_tag;
if( OMPI_ANY_TAG == tag ) {
for (frag = (mca_pml_ob1_recv_frag_t*)ompi_list_get_first(unexpected_frags);
frag != (mca_pml_ob1_recv_frag_t*)ompi_list_get_end(unexpected_frags);
frag = (mca_pml_ob1_recv_frag_t*)ompi_list_get_next(frag)) {
hdr = &(frag->hdr.hdr_match);
/* check first frag - we assume that process matching has been done already */
if( hdr->hdr_tag >= 0 ) {
goto find_fragment;
}
}
} else {
for (frag = (mca_pml_ob1_recv_frag_t*)ompi_list_get_first(unexpected_frags);
frag != (mca_pml_ob1_recv_frag_t*)ompi_list_get_end(unexpected_frags);
frag = (mca_pml_ob1_recv_frag_t*)ompi_list_get_next(frag)) {
hdr = &(frag->hdr.hdr_match);
/* check first frag - we assume that process matching has been done already */
if ( tag == hdr->hdr_tag ) {
/* we assume that the tag is correct from MPI point of view (ie. >= 0 ) */
goto find_fragment;
}
}
}
return NULL;
find_fragment:
request->req_recv.req_bytes_packed = hdr->hdr_msg_length;
request->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_tag;
request->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_src;
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
ompi_list_remove_item(unexpected_frags, (ompi_list_item_t*)frag);
frag->request = request;
}
return frag;
}

163
src/mca/pml/ob1/pml_ob1_recvreq.h Обычный файл
Просмотреть файл

@ -0,0 +1,163 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef OMPI_PML_GEN2_RECV_REQUEST_H
#define OMPI_PML_GEN2_RECV_REQUEST_H
#include "pml_ob1.h"
#include "pml_ob1_proc.h"
#include "mca/pml/base/pml_base_recvreq.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct mca_pml_ob1_recv_request_t {
mca_pml_base_recv_request_t req_recv;
size_t req_bytes_received;
size_t req_bytes_delivered;
};
typedef struct mca_pml_ob1_recv_request_t mca_pml_ob1_recv_request_t;
OBJ_CLASS_DECLARATION(mca_pml_ob1_recv_request_t);
/**
* Allocate a recv request from the modules free list.
*
* @param rc (OUT) OMPI_SUCCESS or error status on failure.
* @return Receive request.
*/
#define MCA_PML_GEN2_RECV_REQUEST_ALLOC(recvreq, rc) \
do { \
ompi_list_item_t* item; \
rc = OMPI_SUCCESS; \
OMPI_FREE_LIST_GET(&mca_pml_ob1.recv_requests, item, rc); \
recvreq = (mca_pml_ob1_recv_request_t*)item; \
} while(0)
/**
* Initialize a receive request with call parameters.
*
* @param request (IN) Receive request.
* @param addr (IN) User buffer.
* @param count (IN) Number of elements of indicated datatype.
* @param datatype (IN) User defined datatype.
* @param src (IN) Source rank w/in the communicator.
* @param tag (IN) User defined tag.
* @param comm (IN) Communicator.
* @param persistent (IN) Is this a ersistent request.
*/
#define MCA_PML_GEN2_RECV_REQUEST_INIT( \
request, \
addr, \
count, \
datatype, \
src, \
tag, \
comm, \
persistent) \
{ \
MCA_PML_BASE_RECV_REQUEST_INIT( \
&(request)->req_recv, \
addr, \
count, \
datatype, \
src, \
tag, \
comm, \
persistent); \
}
/**
* Return a recv request to the modules free list.
*
* @param request (IN) Receive request.
*/
#define MCA_PML_GEN2_RECV_REQUEST_RETURN(request) \
do { \
MCA_PML_BASE_RECV_REQUEST_FINI(&request->req_recv); \
OMPI_FREE_LIST_RETURN(&mca_pml_ob1.recv_requests, (ompi_list_item_t*)request); \
} while(0)
/**
* Attempt to match the request against the unexpected fragment list
* for all source ranks w/in the communicator.
*
* @param request (IN) Request to match.
*/
void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request);
/**
* Attempt to match the request against the unexpected fragment list
* for a specific source rank.
*
* @param request (IN) Request to match.
*/
void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request);
/**
* Start an initialized request.
*
* @param request Receive request.
* @return OMPI_SUCESS or error status on failure.
*/
#define MCA_PML_GEN2_RECV_REQUEST_START(request) \
{ \
/* init/re-init the request */ \
(request)->req_bytes_received = 0; \
(request)->req_bytes_delivered = 0; \
(request)->req_recv.req_base.req_pml_complete = false; \
(request)->req_recv.req_base.req_ompi.req_complete = false; \
(request)->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
\
/* always set the req_status.MPI_TAG to ANY_TAG before starting the \
* request. This field is used if cancelled to find out if the request \
* has been matched or not. \
*/ \
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = OMPI_ANY_TAG; \
(request)->req_recv.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \
(request)->req_recv.req_base.req_ompi.req_status._cancelled = 0; \
\
/* attempt to match posted recv */ \
if((request)->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { \
mca_pml_ob1_recv_request_match_wild(request); \
} else { \
mca_pml_ob1_recv_request_match_specific(request); \
} \
}
/**
*
*/
void mca_pml_ob1_recv_request_progress(
mca_pml_ob1_recv_request_t* req,
mca_bmi_base_module_t* bmi,
mca_bmi_base_segment_t* segments,
size_t num_segments);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

285
src/mca/pml/ob1/pml_ob1_sendreq.c Обычный файл
Просмотреть файл

@ -0,0 +1,285 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
#include "ompi_config.h"
#include "include/constants.h"
#include "mca/pml/pml.h"
#include "mca/bmi/bmi.h"
#include "pml_ob1.h"
#include "pml_ob1_hdr.h"
#include "pml_ob1_proc.h"
#include "pml_ob1_sendreq.h"
#include "pml_ob1_recvreq.h"
#include "pml_ob1_endpoint.h"
static int mca_pml_ob1_send_request_fini(struct ompi_request_t** request)
{
MCA_PML_GEN2_FINI(request);
return OMPI_SUCCESS;
}
static int mca_pml_ob1_send_request_free(struct ompi_request_t** request)
{
MCA_PML_GEN2_FREE(request);
return OMPI_SUCCESS;
}
static int mca_pml_ob1_send_request_cancel(struct ompi_request_t* request, int complete)
{
/* we dont cancel send requests by now */
return OMPI_SUCCESS;
}
static void mca_pml_ob1_send_request_construct(mca_pml_ob1_send_request_t* req)
{
req->req_send.req_base.req_type = MCA_PML_REQUEST_SEND;
req->req_send.req_base.req_ompi.req_fini = mca_pml_ob1_send_request_fini;
req->req_send.req_base.req_ompi.req_free = mca_pml_ob1_send_request_free;
req->req_send.req_base.req_ompi.req_cancel = mca_pml_ob1_send_request_cancel;
}
static void mca_pml_ob1_send_request_destruct(mca_pml_ob1_send_request_t* req)
{
}
OBJ_CLASS_INSTANCE(
mca_pml_ob1_send_request_t,
mca_pml_base_send_request_t,
mca_pml_ob1_send_request_construct,
mca_pml_ob1_send_request_destruct);
/**
*
*/
static void mca_pml_ob1_send_completion(
mca_bmi_base_module_t* bmi,
struct mca_bmi_base_endpoint_t* ep,
struct mca_bmi_base_descriptor_t* descriptor,
int status)
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
mca_pml_ob1_endpoint_t* ob1_ep = sendreq->req_endpoint;
OMPI_THREAD_LOCK(&ompi_request_lock);
if (sendreq->req_offset == sendreq->req_send.req_bytes_packed) {
sendreq->req_send.req_base.req_pml_complete = true;
if (sendreq->req_send.req_base.req_ompi.req_complete == false) {
sendreq->req_send.req_base.req_ompi.req_status.MPI_SOURCE = sendreq->req_send.req_base.req_comm->c_my_rank;
sendreq->req_send.req_base.req_ompi.req_status.MPI_TAG = sendreq->req_send.req_base.req_tag;
sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
sendreq->req_send.req_base.req_ompi.req_status._count = sendreq->req_send.req_bytes_packed;
sendreq->req_send.req_base.req_ompi.req_complete = true;
if(ompi_request_waiting) {
ompi_condition_broadcast(&ompi_request_cond);
}
} else if(sendreq->req_send.req_base.req_free_called) {
MCA_PML_GEN2_FREE((ompi_request_t**)&sendreq);
} else if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) {
mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq);
}
}
OMPI_THREAD_UNLOCK(&ompi_request_lock);
/* check for pending requests that need to be progressed */
while(ompi_list_get_size(&mca_pml_ob1.send_pending) != 0) {
OMPI_THREAD_LOCK(&mca_pml_ob1.ob1_lock);
sendreq = (mca_pml_ob1_send_request_t*)ompi_list_remove_first(&mca_pml_ob1.send_pending);
OMPI_THREAD_UNLOCK(&mca_pml_ob1.ob1_lock);
}
/* release NTL resources */
if(ob1_ep->bmi_cache == NULL) {
ob1_ep->bmi_cache = descriptor;
} else {
ob1_ep->bmi_free(bmi,descriptor);
}
}
/**
* NTL can send directly from user allocated memory.
*/
int mca_pml_ob1_send_user(
mca_pml_ob1_send_request_t* sendreq,
mca_pml_ob1_endpoint_t* endpoint)
{
return OMPI_ERROR;
}
/**
* NTL requires "specially" allocated memory. Request a segment that
* is used for initial hdr and any eager data.
*/
int mca_pml_ob1_send_copy(
mca_pml_ob1_send_request_t* sendreq,
mca_pml_ob1_endpoint_t* endpoint)
{
mca_bmi_base_descriptor_t* descriptor;
mca_bmi_base_segment_t* segment;
mca_pml_ob1_hdr_t* hdr;
size_t size = sendreq->req_send.req_bytes_packed;
int rc;
/* shortcut for zero byte */
if(size == 0) {
descriptor = endpoint->bmi_cache;
if(NULL != descriptor) {
endpoint->bmi_cache = NULL;
} else {
descriptor = endpoint->bmi_alloc(endpoint->bmi, sizeof(mca_pml_ob1_hdr_t));
if(NULL == descriptor) {
OBJ_RELEASE(sendreq);
return OMPI_ERR_OUT_OF_RESOURCE;
}
descriptor->des_cbfunc = mca_pml_ob1_send_completion;
}
segment = descriptor->des_src;
/* build hdr */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence;
/* if an acknowledgment is not required - can get by w/ shorter hdr */
if (sendreq->req_send.req_send_mode != MCA_PML_BASE_SEND_SYNCHRONOUS) {
hdr->hdr_common.hdr_flags = 0;
hdr->hdr_common.hdr_type = MCA_PML_GEN2_HDR_TYPE_MATCH;
segment->seg_len = sizeof(mca_pml_ob1_match_hdr_t);
} else {
hdr->hdr_common.hdr_flags = MCA_PML_GEN2_HDR_FLAGS_ACK;
hdr->hdr_common.hdr_type = MCA_PML_GEN2_HDR_TYPE_RNDV;
hdr->hdr_rndv.hdr_frag_length = 0;
hdr->hdr_rndv.hdr_src_ptr.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */
hdr->hdr_rndv.hdr_src_ptr.pval = sendreq;
segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t);
}
ompi_request_complete((ompi_request_t*)sendreq);
} else {
struct iovec iov;
unsigned int iov_count;
unsigned int max_data;
/* determine first fragment size */
if(size > endpoint->bmi_eager_limit - sizeof(mca_pml_ob1_hdr_t)) {
size = endpoint->bmi_eager_limit - sizeof(mca_pml_ob1_hdr_t);
}
/* allocate space for hdr + first fragment */
descriptor = endpoint->bmi_alloc(endpoint->bmi, size + sizeof(mca_pml_ob1_hdr_t));
if(NULL == descriptor) {
OBJ_RELEASE(sendreq);
return OMPI_ERR_OUT_OF_RESOURCE;
}
descriptor->des_cbfunc = mca_pml_ob1_send_completion;
segment = descriptor->des_src;
/* build hdr */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence;
/* if an acknowledgment is not required - can get by w/ shorter hdr */
if (sendreq->req_send.req_send_mode != MCA_PML_BASE_SEND_SYNCHRONOUS) {
hdr->hdr_common.hdr_flags = MCA_PML_GEN2_HDR_FLAGS_ACK;
hdr->hdr_common.hdr_type = MCA_PML_GEN2_HDR_TYPE_MATCH;
/* pack the data into the supplied buffer */
iov.iov_base = (unsigned char*)segment->seg_addr.pval + sizeof(mca_pml_ob1_match_hdr_t);
iov.iov_len = size;
iov_count = 1;
max_data = size;
if((rc = ompi_convertor_pack(
&sendreq->req_send.req_convertor,
&iov,
&iov_count,
&max_data,
NULL)) < 0) {
endpoint->bmi_free(endpoint->bmi, descriptor);
OBJ_RELEASE(sendreq);
return rc;
}
/* update length w/ number of bytes actually packed */
segment->seg_len = sizeof(mca_pml_ob1_match_hdr_t) + max_data;
/* rendezvous header is required */
} else {
hdr->hdr_common.hdr_flags = MCA_PML_GEN2_HDR_FLAGS_ACK;
hdr->hdr_common.hdr_type = MCA_PML_GEN2_HDR_TYPE_RNDV;
hdr->hdr_rndv.hdr_src_ptr.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */
hdr->hdr_rndv.hdr_src_ptr.pval = sendreq;
/* pack the data into the supplied buffer */
iov.iov_base = (unsigned char*)segment->seg_addr.pval + sizeof(mca_pml_ob1_rendezvous_hdr_t);
iov.iov_len = size;
iov_count = 1;
max_data = size;
if((rc = ompi_convertor_pack(
&sendreq->req_send.req_convertor,
&iov,
&iov_count,
&max_data,
NULL)) < 0) {
endpoint->bmi_free(endpoint->bmi, descriptor);
OBJ_RELEASE(sendreq);
return rc;
}
hdr->hdr_rndv.hdr_frag_length = max_data;
segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t) + max_data;
}
sendreq->req_offset = max_data;
if(sendreq->req_offset == sendreq->req_send.req_bytes_packed) {
ompi_request_complete((ompi_request_t*)sendreq);
}
}
descriptor->des_cbdata = sendreq;
/* send */
rc = endpoint->bmi_send(
endpoint->bmi,
endpoint->bmi_endpoint,
descriptor,
MCA_BMI_TAG_PML);
if(OMPI_SUCCESS != rc) {
endpoint->bmi_free(endpoint->bmi,descriptor);
OBJ_RELEASE(sendreq);
}
return rc;
}

151
src/mca/pml/ob1/pml_ob1_sendreq.h Обычный файл
Просмотреть файл

@ -0,0 +1,151 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef OMPI_PML_GEN2_SEND_REQUEST_H
#define OMPI_PML_GEN2_SEND_REQUEST_H
#include "mca/bmi/bmi.h"
#include "mca/pml/base/pml_base_sendreq.h"
#include "pml_ob1_proc.h"
#include "pml_ob1_comm.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct mca_pml_ob1_send_request_t {
mca_pml_base_send_request_t req_send;
mca_pml_ob1_proc_t* req_proc;
mca_pml_ob1_endpoint_t* req_endpoint;
size_t req_offset;
};
typedef struct mca_pml_ob1_send_request_t mca_pml_ob1_send_request_t;
OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
#define MCA_PML_GEN2_SEND_REQUEST_ALLOC( \
comm, \
dst, \
sendreq, \
rc) \
{ \
mca_pml_ob1_proc_t *proc = comm->c_pml_procs[dst]; \
ompi_list_item_t* item; \
\
if(NULL == proc) { \
rc = OMPI_ERR_OUT_OF_RESOURCE; \
} else { \
rc = OMPI_SUCCESS; \
OMPI_FREE_LIST_WAIT(&mca_pml_ob1.send_requests, item, rc); \
sendreq = (mca_pml_ob1_send_request_t*)item; \
sendreq->req_proc = proc; \
} \
}
#define MCA_PML_GEN2_SEND_REQUEST_INIT( \
sendreq, \
buf, \
count, \
datatype, \
dst, \
tag, \
comm, \
sendmode, \
persistent) \
{ \
MCA_PML_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \
buf, \
count, \
datatype, \
dst, \
tag, \
comm, \
sendmode, \
persistent); \
}
/**
* NTL doesn't require pre-pinned or "specially" allocated memory.
* Can try to directly send from the users buffer if contigous.
*/
int mca_pml_ob1_send_user(
mca_pml_ob1_send_request_t* sendreq,
mca_pml_ob1_endpoint_t* endpoint);
/**
* NTL requires "specially" allocated memory. Request a segment that
* is used for initial hdr and any eager data.
*/
int mca_pml_ob1_send_copy(
mca_pml_ob1_send_request_t* sendreq,
mca_pml_ob1_endpoint_t* endpoint);
/**
* Start a send request.
*/
#define MCA_PML_GEN2_SEND_REQUEST_START(sendreq, rc) \
{ \
mca_pml_ob1_endpoint_t* endpoint; \
mca_pml_ob1_proc_t* proc = sendreq->req_proc; \
\
/* select next endpoint */ \
endpoint = mca_pml_ob1_ep_array_get_next(&proc->bmi_first); \
sendreq->req_offset = 0; \
sendreq->req_send.req_base.req_ompi.req_complete = false; \
sendreq->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
sendreq->req_send.req_base.req_sequence = OMPI_THREAD_ADD32(&proc->proc_sequence,1); \
sendreq->req_endpoint = endpoint; \
\
/* handle buffered send */ \
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \
mca_pml_base_bsend_request_start(&sendreq->req_send.req_base.req_ompi); \
} \
\
if(NULL != endpoint->bmi_alloc) { \
rc = mca_pml_ob1_send_copy(sendreq, endpoint); \
} else { \
rc = mca_pml_ob1_send_user(sendreq, endpoint); \
} \
}
#define MCA_PML_GEN2_SEND_REQUEST_RETURN(sendreq) \
{ \
/* Let the base handle the reference counts */ \
MCA_PML_BASE_SEND_REQUEST_FINI((&sendreq->req_send)); \
OMPI_FREE_LIST_RETURN( \
&mca_pml_ob1.send_requests, (ompi_list_item_t*)sendreq); \
}
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

121
src/mca/pml/ob1/pml_ob1_start.c Обычный файл
Просмотреть файл

@ -0,0 +1,121 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "pml_ob1.h"
#include "pml_ob1_recvreq.h"
#include "pml_ob1_sendreq.h"
int mca_pml_ob1_start(size_t count, ompi_request_t** requests)
{
int rc;
size_t i;
for(i=0; i<count; i++) {
mca_pml_base_request_t *pml_request = (mca_pml_base_request_t*)requests[i];
if(NULL == pml_request)
continue;
/* If the persistent request is currebmiy active - obtain the
* request lock and verify the status is incomplete. if the
* pml layer has not completed the request - mark the request
* as free called - so that it will be freed when the request
* completes - and create a new request.
*/
switch(pml_request->req_ompi.req_state) {
case OMPI_REQUEST_INACTIVE:
if(pml_request->req_pml_complete == true)
break;
/* otherwise fall through */
case OMPI_REQUEST_ACTIVE: {
ompi_request_t *request;
OMPI_THREAD_LOCK(&ompi_request_lock);
if (pml_request->req_pml_complete == false) {
/* free request after it completes */
pml_request->req_free_called = true;
} else {
/* can reuse the existing request */
OMPI_THREAD_UNLOCK(&ompi_request_lock);
break;
}
/* allocate a new request */
switch(pml_request->req_type) {
case MCA_PML_REQUEST_SEND: {
mca_pml_base_send_mode_t sendmode =
((mca_pml_base_send_request_t*)pml_request)->req_send_mode;
rc = mca_pml_ob1_isend_init(
pml_request->req_addr,
pml_request->req_count,
pml_request->req_datatype,
pml_request->req_peer,
pml_request->req_tag,
sendmode,
pml_request->req_comm,
&request);
break;
}
case MCA_PML_REQUEST_RECV:
rc = mca_pml_ob1_irecv_init(
pml_request->req_addr,
pml_request->req_count,
pml_request->req_datatype,
pml_request->req_peer,
pml_request->req_tag,
pml_request->req_comm,
&request);
break;
default:
rc = OMPI_ERR_REQUEST;
break;
}
OMPI_THREAD_UNLOCK(&ompi_request_lock);
if(OMPI_SUCCESS != rc)
return rc;
pml_request = (mca_pml_base_request_t*)request;
requests[i] = request;
break;
}
default:
return OMPI_ERR_REQUEST;
}
/* start the request */
switch(pml_request->req_type) {
case MCA_PML_REQUEST_SEND:
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)pml_request;
MCA_PML_GEN2_SEND_REQUEST_START(sendreq, rc);
if(rc != OMPI_SUCCESS)
return rc;
break;
}
case MCA_PML_REQUEST_RECV:
{
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)pml_request;
MCA_PML_GEN2_RECV_REQUEST_START(recvreq);
break;
}
default:
return OMPI_ERR_REQUEST;
}
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -39,10 +39,6 @@
* returning the highest priority and closes/unloads any other PML
* components that may have been opened.
*
* After the PML is selected, the MCA framework loads and initalize
* all available PTLs. The PML is notified of the selected PTLs via
* the the mca_pml_base_add_ptls_fn_t downcall from the MCA.
*
* After all of the MCA components are initialized, the MPI/RTE will
* make downcalls into the PML to provide the initial list of
* processes (ompi_proc_t instances), and notification of changes
@ -160,32 +156,14 @@ typedef int (*mca_pml_base_module_add_procs_fn_t)(struct ompi_proc_t **procs, si
*/
typedef int (*mca_pml_base_module_del_procs_fn_t)(struct ompi_proc_t **procs, size_t nprocs);
/**
* Downcall from MCA layer after all PTLs have been loaded/selected.
*
* @param ptls List of selected PTLs
* @return OMPI_SUCCESS or failure status.
*
* Provides a notification to the PML that processes have
* gone away, and provides the PML the opportunity to cleanup
* any data cached on the ompi_proc_t data structure.
*/
typedef int (*mca_pml_base_module_add_ptls_fn_t)(ompi_list_t *ptls);
/**
* Downcall from MCA layer to enable the PML/PTLs.
*
* @param param parameter to change
* @param value optional value
* @param size size of value
* @param enable Enable/Disable PML forwarding
* @return OMPI_SUCCESS or failure status.
*/
typedef int (*mca_pml_base_module_control_fn_t)(
int param,
void *value,
size_t size
typedef int (*mca_pml_base_module_enable_fn_t)(
bool enable
);
@ -468,8 +446,7 @@ struct mca_pml_base_module_1_0_0_t {
/* downcalls from MCA to PML */
mca_pml_base_module_add_procs_fn_t pml_add_procs;
mca_pml_base_module_del_procs_fn_t pml_del_procs;
mca_pml_base_module_add_ptls_fn_t pml_add_ptls;
mca_pml_base_module_control_fn_t pml_control;
mca_pml_base_module_enable_fn_t pml_enable;
mca_pml_base_module_progress_fn_t pml_progress;
/* downcalls from MPI to PML */

Просмотреть файл

@ -40,8 +40,7 @@ mca_pml_teg_t mca_pml_teg = {
{
mca_pml_teg_add_procs,
mca_pml_teg_del_procs,
mca_pml_teg_add_ptls,
mca_pml_teg_control,
mca_pml_teg_enable,
mca_pml_teg_progress,
mca_pml_teg_add_comm,
mca_pml_teg_del_comm,
@ -91,11 +90,11 @@ static int ptl_exclusivity_compare(const void* arg1, const void* arg2)
}
int mca_pml_teg_add_ptls(ompi_list_t *ptls)
int mca_pml_teg_add_ptls(void)
{
/* build an array of ptls and ptl modules */
mca_ptl_base_selected_module_t* selected_ptl;
size_t num_ptls = ompi_list_get_size(ptls);
size_t num_ptls = ompi_list_get_size(&mca_ptl_base_modules_initialized);
size_t cache_bytes = 0;
mca_pml_teg.teg_num_ptl_modules = 0;
mca_pml_teg.teg_num_ptl_progress = 0;
@ -109,9 +108,11 @@ int mca_pml_teg_add_ptls(ompi_list_t *ptls)
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(selected_ptl = (mca_ptl_base_selected_module_t*)ompi_list_get_first(ptls);
selected_ptl != (mca_ptl_base_selected_module_t*)ompi_list_get_end(ptls);
selected_ptl = (mca_ptl_base_selected_module_t*)ompi_list_get_next(selected_ptl)) {
for(selected_ptl = (mca_ptl_base_selected_module_t*)
ompi_list_get_first(&mca_ptl_base_modules_initialized);
selected_ptl != (mca_ptl_base_selected_module_t*)
ompi_list_get_end(&mca_ptl_base_modules_initialized);
selected_ptl = (mca_ptl_base_selected_module_t*)ompi_list_get_next(selected_ptl)) {
mca_ptl_base_module_t *ptl = selected_ptl->pbsm_module;
size_t i;
@ -163,12 +164,13 @@ int mca_pml_teg_add_ptls(ompi_list_t *ptls)
* Pass control information through to all PTL modules.
*/
int mca_pml_teg_control(int param, void* value, size_t size)
int mca_pml_teg_enable(bool enable)
{
size_t i=0;
int value = enable;
for(i=0; i<mca_pml_teg.teg_num_ptl_components; i++) {
if(NULL != mca_pml_teg.teg_ptl_components[i]->ptlm_control) {
int rc = mca_pml_teg.teg_ptl_components[i]->ptlm_control(param,value,size);
int rc = mca_pml_teg.teg_ptl_components[i]->ptlm_control(MCA_PTL_ENABLE,&value,sizeof(value));
if(rc != OMPI_SUCCESS)
return rc;
}

Просмотреть файл

@ -113,14 +113,10 @@ extern int mca_pml_teg_del_procs(
size_t nprocs
);
extern int mca_pml_teg_add_ptls(
ompi_list_t *ptls
);
extern int mca_pml_teg_add_ptls(void);
extern int mca_pml_teg_control(
int param,
void *size,
size_t value
extern int mca_pml_teg_enable(
bool enable
);
extern int mca_pml_teg_progress(void);
@ -239,7 +235,6 @@ extern int mca_pml_teg_start(
pml_request->req_free_called = true; \
if( pml_request->req_pml_complete == true) \
{ \
OMPI_REQUEST_FINI(*(request)); \
switch(pml_request->req_type) { \
case MCA_PML_REQUEST_SEND: \
{ \

Просмотреть файл

@ -22,6 +22,7 @@
#include "mca/ptl/ptl.h"
#include "mca/base/mca_base_param.h"
#include "mca/pml/base/pml_base_bsend.h"
#include "mca/ptl/base/base.h"
#include "pml_teg.h"
#include "pml_teg_proc.h"
#include "pml_teg_sendreq.h"
@ -102,14 +103,19 @@ int mca_pml_teg_component_open(void)
mca_pml_teg.teg_poll_iterations =
mca_pml_teg_param_register_int("poll_iterations", 100000);
mca_pml_teg.teg_priority =
mca_pml_teg_param_register_int("priority", 0);
mca_pml_teg_param_register_int("priority", 1);
return OMPI_SUCCESS;
/* attempt to open all ptls */
return mca_ptl_base_open();
}
int mca_pml_teg_component_close(void)
{
int rc;
if(OMPI_SUCCESS != (rc = mca_ptl_base_close()))
return rc;
#ifdef WIN32
WSACleanup();
#endif
@ -172,6 +178,11 @@ mca_pml_base_module_t* mca_pml_teg_component_init(int* priority,
if(rc != OMPI_SUCCESS)
return NULL;
rc = mca_ptl_base_select(enable_progress_threads,enable_mpi_threads);
if(rc != OMPI_SUCCESS)
return NULL;
mca_pml_teg_add_ptls();
return &mca_pml_teg.super;
}

Просмотреть файл

@ -44,7 +44,7 @@ int mca_pml_teg_iprobe(int src,
ompi_progress();
}
}
MCA_PML_BASE_RECV_REQUEST_RETURN((&recvreq.req_recv));
MCA_PML_BASE_RECV_REQUEST_FINI((&recvreq.req_recv));
return rc;
}
@ -63,7 +63,7 @@ int mca_pml_teg_probe(int src,
MCA_PML_TEG_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char, src, tag, comm, true);
if ((rc = mca_pml_teg_recv_request_start(&recvreq)) != OMPI_SUCCESS) {
MCA_PML_BASE_RECV_REQUEST_RETURN((&recvreq.req_recv));
MCA_PML_BASE_RECV_REQUEST_FINI((&recvreq.req_recv));
return rc;
}
@ -87,7 +87,7 @@ int mca_pml_teg_probe(int src,
if (NULL != status) {
*status = recvreq.req_recv.req_base.req_ompi.req_status;
}
MCA_PML_BASE_RECV_REQUEST_RETURN(&recvreq.req_recv);
MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq.req_recv);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -79,7 +79,7 @@ OBJ_CLASS_DECLARATION(mca_pml_teg_recv_request_t);
*/
#define MCA_PML_TEG_RECV_REQUEST_RETURN(request) \
do { \
MCA_PML_BASE_RECV_REQUEST_RETURN( &request->req_recv ); \
MCA_PML_BASE_RECV_REQUEST_FINI( &request->req_recv ); \
OMPI_FREE_LIST_RETURN(&mca_pml_teg.teg_recv_requests, (ompi_list_item_t*)request); \
} while(0)

Просмотреть файл

@ -128,7 +128,7 @@ OBJ_CLASS_DECLARATION(mca_pml_teg_send_request_t);
mca_pml_base_ptl_t* ptl_base = ptl->ptl_base; \
\
/* Let the base handle the reference counts */ \
MCA_PML_BASE_SEND_REQUEST_RETURN((&sendreq->req_send)); \
MCA_PML_BASE_SEND_REQUEST_FINI((&sendreq->req_send)); \
\
/* \
* If there is a cache associated with the ptl - first attempt \

Просмотреть файл

@ -20,6 +20,7 @@
#include "mpi.h"
#include "mca/pml/pml.h"
#include "mca/ptl/ptl.h"
#include "mca/ptl/base/base.h"
#include "mca/base/mca_base_param.h"
#include "mca/pml/base/pml_base_bsend.h"
#include "pml_uniq.h"
@ -102,12 +103,17 @@ int mca_pml_uniq_component_open(void)
mca_pml_uniq.uniq_poll_iterations =
mca_pml_uniq_param_register_int("poll_iterations", 100000);
return OMPI_SUCCESS;
/* attempt to open ptls */
return mca_ptl_base_open();
}
int mca_pml_uniq_component_close(void)
{
int rc;
if(OMPI_SUCCESS != (rc = mca_ptl_base_close()))
return rc;
#ifdef WIN32
WSACleanup();
#endif
@ -170,6 +176,11 @@ mca_pml_base_module_t* mca_pml_uniq_component_init(int* priority,
if(rc != OMPI_SUCCESS)
return NULL;
rc = mca_ptl_base_select(enable_progress_threads,enable_mpi_threads);
if(rc != OMPI_SUCCESS)
return NULL;
mca_pml_teg_add_ptls();
return &mca_pml_uniq.super;
}

Просмотреть файл

@ -143,11 +143,6 @@ int mca_ptl_base_select(bool enable_progress_threads,
orte_abort(1, "No ptl components available. This shouldn't happen.");
}
/* Once we have some modules, tell the PML about them */
MCA_PML_CALL(add_ptls(&mca_ptl_base_modules_initialized));
/* All done */
return OMPI_SUCCESS;
}

Просмотреть файл

@ -18,8 +18,21 @@
include $(top_ompi_srcdir)/config/Makefile.options
sources =
include src/Makefile.extra
sources = \
ptl_tcp.c \
ptl_tcp.h \
ptl_tcp_addr.h \
ptl_tcp_component.c \
ptl_tcp_peer.c \
ptl_tcp_peer.h \
ptl_tcp_proc.c \
ptl_tcp_proc.h \
ptl_tcp_recvfrag.c \
ptl_tcp_recvfrag.h \
ptl_tcp_sendfrag.c \
ptl_tcp_sendfrag.h \
ptl_tcp_sendreq.c \
ptl_tcp_sendreq.h
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
@ -45,3 +58,4 @@ mca_ptl_tcp_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(lib)
libmca_ptl_tcp_la_SOURCES = $(lib_sources)
libmca_ptl_tcp_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -17,6 +17,6 @@
# Specific to this module
PARAM_INIT_FILE=src/ptl_tcp.c
PARAM_CONFIG_HEADER_FILE="src/tcp_config.h"
PARAM_INIT_FILE=ptl_tcp.c
PARAM_CONFIG_HEADER_FILE="tcp_config.h"
PARAM_CONFIG_FILES="Makefile"

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -40,8 +40,6 @@
#include "mca/base/mca_base_module_exchange.h"
#include "mca/pml/pml.h"
#include "mca/pml/base/base.h"
#include "mca/ptl/ptl.h"
#include "mca/ptl/base/base.h"
#include "mca/coll/coll.h"
#include "mca/coll/base/base.h"
#include "mca/topo/topo.h"
@ -119,9 +117,6 @@ int ompi_mpi_finalize(void)
/* Now that all MPI objects dealing with communications are gone,
shut down MCA types having to do with communications */
if (OMPI_SUCCESS != (ret = mca_ptl_base_close())) {
return ret;
}
if (OMPI_SUCCESS != (ret = mca_pml_base_close())) {
return ret;
}

Просмотреть файл

@ -44,8 +44,6 @@
#include "mca/mpool/base/base.h"
#include "mca/mpool/mpool.h"
#include "mca/pml/pml.h"
#include "mca/ptl/ptl.h"
#include "mca/ptl/base/base.h"
#include "mca/pml/pml.h"
#include "mca/pml/base/base.h"
#include "mca/coll/coll.h"
@ -81,7 +79,7 @@ ompi_thread_t *ompi_mpi_main_thread = NULL;
int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
{
int ret, param;
int ret;
ompi_proc_t** procs;
size_t nprocs;
char *error = NULL;
@ -169,10 +167,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
error = "mca_pml_base_open() failed";
goto error;
}
if (OMPI_SUCCESS != (ret = mca_ptl_base_open())) {
error = "mca_ptl_base_open() failed";
goto error;
}
if (OMPI_SUCCESS != (ret = mca_coll_base_open())) {
error = "mca_coll_base_open() failed";
goto error;
@ -207,13 +201,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
goto error;
}
if (OMPI_SUCCESS !=
(ret = mca_ptl_base_select(OMPI_ENABLE_PROGRESS_THREADS,
OMPI_ENABLE_MPI_THREADS))) {
error = "mca_ptl_base_select() failed";
goto error;
}
if (OMPI_SUCCESS !=
(ret = mca_coll_base_find_available(OMPI_ENABLE_PROGRESS_THREADS,
OMPI_ENABLE_MPI_THREADS))) {
@ -346,10 +333,11 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
}
free(procs);
MCA_PML_CALL(add_comm(&ompi_mpi_comm_world));
MCA_PML_CALL(add_comm(&ompi_mpi_comm_self));
/* start PTL's */
param = 1;
if (OMPI_SUCCESS !=
(ret = mca_pml.pml_control(MCA_PTL_ENABLE, &param, sizeof(param)))) {
if (OMPI_SUCCESS != (ret = mca_pml.pml_enable(true))) {
error = "PML control failed";
goto error;
}

Просмотреть файл

@ -401,7 +401,7 @@ ompi_progress(void)
ompi_atomic_unlock(&progress_lock);
#endif /* OMPI_HAVE_THREAD_SUPPORT */
if (call_yield && events <= 0) {
if (events <= 0) {
/* If there is nothing to do - yield the processor - otherwise
* we could consume the processor for the entire time slice. If
* the processor is oversubscribed - this will result in a best-case

Просмотреть файл

@ -82,7 +82,7 @@ ompi_cmd_line_init_t orte_cmd_line_opts[] = {
"Show the orted version" },
{ "orte", "debug", NULL, 'd', NULL, "debug", 0,
NULL, OMPI_CMD_LINE_TYPE_BOOL,
&orted_globals.debug, OMPI_CMD_LINE_TYPE_BOOL,
"Debug the OpenRTE" },
{ NULL, NULL, NULL, '\0', NULL, "no-daemonize", 0,
@ -156,6 +156,8 @@ int main(int argc, char *argv[])
char log_file[PATH_MAX];
char *jobidstring;
fprintf(stderr, "orted\n");
/* setup to check common command line options that just report and die */
memset(&orted_globals, 0, sizeof(orted_globals_t));
cmd_line = OBJ_NEW(ompi_cmd_line_t);
@ -207,7 +209,9 @@ int main(int argc, char *argv[])
/* detach from controlling terminal
* otherwise, remain attached so output can get to us
*/
if(orted_globals.debug_daemons == false && orted_globals.no_daemonize == false) {
if(orted_globals.debug == false &&
orted_globals.debug_daemons == false &&
orted_globals.no_daemonize == false) {
orte_daemon_init(NULL);
}

Просмотреть файл

@ -50,6 +50,7 @@ typedef struct {
bool help;
bool version;
bool no_daemonize;
bool debug;
bool debug_daemons;
bool debug_daemons_file;
char* name;