1
1

- Use the GM BTL as a template instead, per Tim's suggestion

- Begin adding uDAPL-specific stuff
- Added config/ompi_check_udapl.m4 - hopefully I did this right

This commit was SVN r8681.
Этот коммит содержится в:
Andrew Friedley 2006-01-12 04:05:02 +00:00
родитель f5efb70252
Коммит c0bad339af
11 изменённых файлов: 628 добавлений и 188 удалений

Просмотреть файл

@ -63,6 +63,7 @@ m4_include(config/ompi_check_mx.m4)
m4_include(config/ompi_check_bproc.m4)
m4_include(config/ompi_check_mvapi.m4)
m4_include(config/ompi_check_openib.m4)
m4_include(config/ompi_check_udapl.m4)
m4_include(config/ompi_check_package.m4)
m4_include(config/ompi_check_slurm.m4)
m4_include(config/ompi_check_tm.m4)

68
config/ompi_check_udapl.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,68 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# OMPI_CHECK_UDAPL(prefix, [action-if-found], [action-if-not-found])
# --------------------------------------------------------
# check if uDAPL support can be found. sets prefix_{CPPFLAGS,
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
# support, otherwise executes action-if-not-found
AC_DEFUN([OMPI_CHECK_UDAPL],[
AC_ARG_WITH([udapl],
[AC_HELP_STRING([--with-udapl(=DIR)],
[Build uDAPL support, searching for libraries in DIR])])
AC_ARG_WITH([udapl-libdir],
[AC_HELP_STRING([--with-udapl-libdir=DIR],
[Search for uDAPL libraries in DIR/lib and DIR/lib64
in addition to other search paths])])
AS_IF([test ! -z "$with_udapl" -a "$with_udapl" != "yes"],
[ompi_check_udapl_dir="$with_udapl"])
AS_IF([test ! -z "$with_udapl_libdir" -a "$with_udapl_libdir" != "yes"],
[ompi_check_udapl_libdir="$with_udapl_libdir"])
AS_IF([test "$with_udapl" != "no"],
[ # check for pthreads and emit a warning that
# things might go south...
AS_IF([test "$HAVE_POSIX_THREADS" != "1"],
[AC_MSG_WARN([POSIX threads not enabled. May not be able to link with udapl])])
ompi_check_udapl$1_save_CFLAGS="$CFLAGS"
ompi_check_udapl$1_save_CPPFLAGS="$CPPFLAGS"
OMPI_CHECK_PACKAGE([$1],
[dat/udat.h],
[dapl],
[dat_ia_open],
[-ldat],
[$ompi_check_udapl_dir],
[$ompi_check_udapl_libdir],
[ompi_check_udapl_happy="yes"],
[ompi_check_udapl_happy="no"])
CPPFLAGS="$ompi_check_udapl$1_save_CPPFLAGS"],
[ompi_check_udapl_happy="no"])
AS_IF([test "$ompi_check_udapl_happy" = "yes"],
[$2],
[AS_IF([test ! -z "$with_udapl" -a "$with_udapl" != "no"],
[AC_MSG_ERROR([uDAPL support requested but not found. Aborting])])
$3])
])

Просмотреть файл

@ -20,7 +20,9 @@
sources = \
AM_CPPFLAGS = $(btl_udapl_CPPFLAGS)
udapl_sources = \
btl_udapl.c \
btl_udapl.h \
btl_udapl_component.c \
@ -29,34 +31,32 @@ sources = \
btl_udapl_frag.c \
btl_udapl_frag.h \
btl_udapl_proc.c \
btl_udapl_proc.h \
btl_udapl_error.h
btl_udapl_proc.h
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_btl_udapl_DSO
lib =
lib_sources =
component = mca_btl_udapl.la
component_sources = $(sources)
component_noinst =
component_install = mca_btl_udapl.la
else
lib = libmca_btl_udapl.la
lib_sources = $(sources)
component =
component_sources =
component_noinst = libmca_btl_udapl.la
component_install =
endif
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component)
mca_btl_udapl_la_SOURCES = $(component_sources)
mca_btl_udapl_la_LDFLAGS = -module -avoid-version
mcacomponent_LTLIBRARIES = $(component_install)
mca_btl_udapl_la_SOURCES = $(udapl_sources)
mca_btl_udapl_la_LIBADD = \
$(btl_udapl_LIBS) \
$(top_ompi_builddir)/ompi/libmpi.la \
$(top_ompi_builddir)/orte/liborte.la \
$(top_ompi_builddir)/opal/libopal.la
mca_btl_udapl_la_LDFLAGS = -module -avoid-version $(btl_udapl_LDFLAGS)
noinst_LTLIBRARIES = $(lib)
libmca_btl_udapl_la_SOURCES = $(lib_sources)
libmca_btl_udapl_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_btl_udapl_la_SOURCES = $(udapl_sources)
libmca_btl_udapl_la_LIBADD = $(btl_udapl_LIBS)
libmca_btl_udapl_la_LDFLAGS = -module -avoid-version $(btl_udapl_LDFLAGS)

Просмотреть файл

@ -28,9 +28,10 @@
#include "btl_udapl_proc.h"
#include "btl_udapl_endpoint.h"
#include "datatype/convertor.h"
#include "datatype/datatype.h"
#include "mca/mpool/base/base.h"
#include "mca/mpool/mpool.h"
#include "ompi/proc/proc.h"
mca_btl_udapl_module_t mca_btl_udapl_module = {
{
@ -58,6 +59,7 @@ mca_btl_udapl_module_t mca_btl_udapl_module = {
}
};
/**
*
*/
@ -78,19 +80,22 @@ int mca_btl_udapl_add_procs(
mca_btl_udapl_proc_t* udapl_proc;
mca_btl_base_endpoint_t* udapl_endpoint;
if(ompi_proc == ompi_proc_local())
continue;
if(NULL == (udapl_proc = mca_btl_udapl_proc_create(ompi_proc))) {
return OMPI_ERR_OUT_OF_RESOURCE;
continue;
}
/*
* Check to make sure that the peer has at least as many interface
* addresses exported as we are trying to use. If not, then
* don't bind this BTL instance to the proc.
* don't bind this PTL instance to the proc.
*/
OPAL_THREAD_LOCK(&udapl_proc->proc_lock);
/* The btl_proc datastructure is shared by all uDAPL BTL
/* The btl_proc datastructure is shared by all uDAPL PTL
* instances that are trying to reach this destination.
* Cache the peer instance on the btl_proc.
*/
@ -107,12 +112,10 @@ int mca_btl_udapl_add_procs(
OPAL_THREAD_UNLOCK(&udapl_proc->proc_lock);
continue;
}
ompi_bitmap_set_bit(reachable, i);
OPAL_THREAD_UNLOCK(&udapl_proc->proc_lock);
peers[i] = udapl_endpoint;
}
return OMPI_SUCCESS;
}
@ -158,7 +161,7 @@ mca_btl_base_descriptor_t* mca_btl_udapl_alloc(
mca_btl_udapl_frag_t* frag;
int rc;
if(size <= btl->btl_eager_limit){
if(size <= btl->btl_eager_limit) {
MCA_BTL_UDAPL_FRAG_ALLOC_EAGER(udapl_btl, frag, rc);
frag->segment.seg_len =
size <= btl->btl_eager_limit ?
@ -170,8 +173,12 @@ mca_btl_base_descriptor_t* mca_btl_udapl_alloc(
size : btl->btl_max_send_size ;
}
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
return (mca_btl_base_descriptor_t*)frag;
return &frag->base;
}
@ -185,15 +192,14 @@ int mca_btl_udapl_free(
{
mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*)des;
if(frag->size == 0) {
#if MCA_BTL_HAS_MPOOL
OBJ_RELEASE(frag->registration);
#endif
btl->btl_mpool->mpool_release(btl->btl_mpool, frag->registration);
MCA_BTL_UDAPL_FRAG_RETURN_USER(btl, frag);
} else if(frag->size == btl->btl_eager_limit){
MCA_BTL_UDAPL_FRAG_RETURN_EAGER(btl, frag);
} else if(frag->size == btl->btl_max_send_size) {
} else if(frag->size == mca_btl_udapl_component.udapl_eager_frag_size) {
MCA_BTL_UDAPL_FRAG_RETURN_EAGER(btl, frag);
} else if(frag->size == mca_btl_udapl_component.udapl_max_frag_size) {
MCA_BTL_UDAPL_FRAG_RETURN_MAX(btl, frag);
} else {
opal_output(0, "[%s:%d] mca_btl_udapl_free: invalid descriptor\n", __FILE__,__LINE__);
return OMPI_ERR_BAD_PARAM;
}
return OMPI_SUCCESS;
@ -221,13 +227,77 @@ mca_btl_base_descriptor_t* mca_btl_udapl_prepare_src(
size_t max_data = *size;
int32_t free_after;
int rc;
/*
* If the data has already been pinned and is contigous than we can
* use it in place.
*/
if (NULL != registration && 0 == ompi_convertor_need_buffers(convertor)) {
size_t reg_len;
MCA_BTL_UDAPL_FRAG_ALLOC_USER(btl, frag, rc);
if(NULL == frag){
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, &free_after);
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
reg_len = (unsigned char*)registration->bound - (unsigned char*)iov.iov_base + 1;
/* bump reference count as so that the registration
* doesn't go away when the operation completes
*/
btl->btl_mpool->mpool_retain(btl->btl_mpool,
(mca_mpool_base_registration_t*) registration);
frag->registration = registration;
/*
* if the data is not already pinned - but the leave pinned option is set,
* then go ahead and pin contigous data. however, if a reserve is required
* then we must allocated a fragment w/ buffer space
*/
} else if (max_data > btl->btl_max_send_size &&
ompi_convertor_need_buffers(convertor) == 0 &&
reserve == 0) {
mca_mpool_base_module_t* mpool = btl->btl_mpool;
MCA_BTL_UDAPL_FRAG_ALLOC_USER(btl, frag, rc);
if(NULL == frag){
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, &free_after);
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
rc = mpool->mpool_register(
mpool,
iov.iov_base,
max_data,
0,
&registration);
if(rc != OMPI_SUCCESS) {
MCA_BTL_UDAPL_FRAG_RETURN_USER(btl,frag);
return NULL;
}
frag->registration = registration;
}
/*
* if we aren't pinning the data and the requested size is less
* than the eager limit pack into a fragment from the eager pool
*/
if (max_data+reserve <= btl->btl_eager_limit) {
else if (max_data+reserve <= btl->btl_eager_limit) {
MCA_BTL_UDAPL_FRAG_ALLOC_EAGER(btl, frag, rc);
if(NULL == frag) {
@ -256,8 +326,8 @@ mca_btl_base_descriptor_t* mca_btl_udapl_prepare_src(
if(NULL == frag) {
return NULL;
}
if(max_data + reserve > frag->size){
max_data = frag->size - reserve;
if(max_data + reserve > btl->btl_max_send_size){
max_data = btl->btl_max_send_size - reserve;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
@ -304,6 +374,8 @@ mca_btl_base_descriptor_t* mca_btl_udapl_prepare_dst(
size_t* size)
{
mca_btl_udapl_frag_t* frag;
mca_mpool_base_module_t* mpool = btl->btl_mpool;
long lb;
int rc;
MCA_BTL_UDAPL_FRAG_ALLOC_USER(btl, frag, rc);
@ -311,14 +383,40 @@ mca_btl_base_descriptor_t* mca_btl_udapl_prepare_dst(
return NULL;
}
ompi_ddt_type_lb(convertor->pDesc, &lb);
frag->segment.seg_len = *size;
frag->segment.seg_addr.pval = convertor->pBaseBuf + convertor->bConverted;
frag->segment.seg_addr.pval = convertor->pBaseBuf + lb + convertor->bConverted;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->base.des_flags = 0;
if(NULL != registration) {
/* bump reference count as so that the registration
* doesn't go away when the operation completes
*/
mpool->mpool_retain(mpool,
(mca_mpool_base_registration_t*) registration);
frag->registration = registration;
} else {
rc = mpool->mpool_register(
mpool,
frag->segment.seg_addr.pval,
frag->segment.seg_len,
0,
&registration);
if(rc != OMPI_SUCCESS) {
MCA_BTL_UDAPL_FRAG_RETURN_USER(btl,frag);
return NULL;
}
frag->registration = registration;
}
return &frag->base;
}
@ -335,18 +433,15 @@ mca_btl_base_descriptor_t* mca_btl_udapl_prepare_dst(
int mca_btl_udapl_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
struct mca_btl_base_descriptor_t* des,
mca_btl_base_tag_t tag)
{
/* mca_btl_udapl_module_t* udapl_btl = (mca_btl_udapl_module_t*) btl; */
mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*)descriptor;
frag->endpoint = endpoint;
/* TODO */
return OMPI_ERR_NOT_IMPLEMENTED;
return OMPI_ERR_NOT_IMPLEMENTED;
}
/**
* Initiate an asynchronous put.
*
@ -358,16 +453,13 @@ int mca_btl_udapl_send(
int mca_btl_udapl_put(
mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* endpoint,
mca_btl_base_descriptor_t* descriptor)
mca_btl_base_descriptor_t* des)
{
/* mca_btl_udapl_module_t* udapl_btl = (mca_btl_udapl_module_t*) btl; */
mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*) descriptor;
frag->endpoint = endpoint;
/* TODO */
return OMPI_ERR_NOT_IMPLEMENTED;
}
/**
* Initiate an asynchronous get.
*
@ -380,13 +472,9 @@ int mca_btl_udapl_put(
int mca_btl_udapl_get(
mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* endpoint,
mca_btl_base_descriptor_t* descriptor)
mca_btl_base_descriptor_t* des)
{
/* mca_btl_udapl_module_t* udapl_btl = (mca_btl_udapl_module_t*) btl; */
mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*) descriptor;
frag->endpoint = endpoint;
/* TODO */
return OMPI_ERR_NOT_IMPLEMENTED;
return OMPI_ERR_NOT_IMPLEMENTED;
}
@ -397,25 +485,6 @@ int mca_btl_udapl_get(
int mca_btl_udapl_finalize(struct mca_btl_base_module_t* btl)
{
mca_btl_udapl_module_t* udapl_btl = (mca_btl_udapl_module_t*) btl;
if(udapl_btl->udapl_frag_eager.fl_num_allocated !=
udapl_btl->udapl_frag_eager.super.opal_list_length){
opal_output(0, "btl udapl_frag_eager: %d allocated %d returned \n",
udapl_btl->udapl_frag_eager.fl_num_allocated,
udapl_btl->udapl_frag_eager.super.opal_list_length);
}
if(udapl_btl->udapl_frag_max.fl_num_allocated !=
udapl_btl->udapl_frag_max.super.opal_list_length) {
opal_output(0, "btl udapl_frag_max: %d allocated %d returned \n",
udapl_btl->udapl_frag_max.fl_num_allocated,
udapl_btl->udapl_frag_max.super.opal_list_length);
}
if(udapl_btl->udapl_frag_user.fl_num_allocated !=
udapl_btl->udapl_frag_user.super.opal_list_length){
opal_output(0, "btl udapl_frag_user: %d allocated %d returned \n",
udapl_btl->udapl_frag_user.fl_num_allocated,
udapl_btl->udapl_frag_user.super.opal_list_length);
}
OBJ_DESTRUCT(&udapl_btl->udapl_lock);
OBJ_DESTRUCT(&udapl_btl->udapl_frag_eager);

Просмотреть файл

@ -25,6 +25,7 @@
/* Standard system includes */
#include <sys/types.h>
#include <string.h>
#include <dat/udat.h>
/* Open MPI includes */
#include "class/ompi_free_list.h"
@ -36,46 +37,42 @@
#include "opal/util/output.h"
#include "mca/mpool/mpool.h"
#include "mca/btl/btl.h"
#include "btl_udapl_endpoint.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
#define MCA_BTL_HAS_MPOOL 1
/*
#define GM_BUFFER_SIZE 7
#define GM_BUFFER_LENGTH gm_max_length_for_size(GM_BUFFER_SIZE)
*/
/**
* uDAPL BTL component.
*/
struct mca_btl_udapl_component_t {
mca_btl_base_component_1_0_0_t super; /**< base BTL component */
mca_btl_base_component_1_0_0_t super; /**< base BTL component */
uint32_t udapl_num_btls;
/**< number of hcas available to the uDAPL component */
size_t udapl_num_btls; /**< number of hcas available to the uDAPL component */
size_t udapl_max_btls; /**< maximum number of supported hcas */
struct mca_btl_udapl_module_t **udapl_btls; /**< array of available BTL modules */
size_t udapl_num_mru;
size_t udapl_eager_frag_size;
size_t udapl_max_frag_size;
char* udapl_port_name;
int32_t udapl_num_repost;
int32_t udapl_num_high_priority; /**< number of receive descriptors at high priority */
int udapl_debug; /**< turn on debug output */
struct mca_btl_udapl_module_t *udapl_btls;
/**< array of available BTL modules */
int udapl_free_list_num;
/**< initial size of free lists */
int udapl_free_list_max;
/**< maximum size of free lists */
int udapl_free_list_inc;
/**< number of elements to alloc when growing free lists */
opal_list_t udapl_procs;
/**< list of udapl proc structures */
opal_mutex_t udapl_lock;
/**< lock for accessing module state */
char* udapl_mpool_name;
/**< name of memory pool */
bool leave_pinned;
/**< pin memory on first use and leave pinned */
int udapl_free_list_num; /**< initial size of free lists */
int udapl_free_list_max; /**< maximum size of free lists */
int udapl_free_list_inc; /**< number of elements to alloc when growing free lists */
opal_list_t udapl_procs; /**< list of udapl proc structures */
opal_mutex_t udapl_lock; /**< lock for accessing module state */
char* udapl_mpool_name; /**< name of memory pool */
bool leave_pinned; /**< pin memory on first use and leave pinned */
};
typedef struct mca_btl_udapl_component_t mca_btl_udapl_component_t;
@ -88,19 +85,31 @@ extern mca_btl_udapl_component_t mca_btl_udapl_component;
*/
struct mca_btl_udapl_module_t {
mca_btl_base_module_t super; /**< base BTL interface */
mca_btl_base_recv_reg_t udapl_reg[MCA_BTL_TAG_MAX];
mca_btl_base_recv_reg_t udapl_reg[256];
/* local port handle/address */
/* struct gm_port *port; */
mca_btl_udapl_addr_t udapl_addr;
/* free list of fragment descriptors */
ompi_free_list_t udapl_frag_eager;
ompi_free_list_t udapl_frag_max;
ompi_free_list_t udapl_frag_user;
/* lock for accessing module state */
opal_mutex_t udapl_lock;
#if MCA_BTL_HAS_MPOOL
struct mca_mpool_base_module_t* udapl_mpool;
/* number of send/recv tokens */
#if 0
int32_t udapl_num_send_tokens;
int32_t udapl_max_send_tokens;
int32_t udapl_num_recv_tokens;
int32_t udapl_max_recv_tokens;
int32_t udapl_num_repost;
#endif
/* lock for accessing module state */
opal_list_t udapl_pending; /**< list of pending send descriptors */
opal_list_t udapl_repost; /**< list of pending fragments */
opal_list_t udapl_mru_reg; /**< list of most recently used registrations */
opal_mutex_t udapl_lock;
};
typedef struct mca_btl_udapl_module_t mca_btl_udapl_module_t;
extern mca_btl_udapl_module_t mca_btl_udapl_module;
@ -307,6 +316,51 @@ extern mca_btl_base_descriptor_t* mca_btl_udapl_prepare_dst(
size_t* size);
/**
* Acquire a send token - queue the fragment if none available
*/
#define MCA_BTL_UDAPL_ACQUIRE_TOKEN(btl, frag) \
do { \
/* queue the descriptor if there are no send tokens */ \
if(OPAL_THREAD_ADD32(&udapl_btl->udapl_num_send_tokens, -1) < 0) { \
OPAL_THREAD_LOCK(&udapl_btl->udapl_lock); \
opal_list_append(&udapl_btl->udapl_pending, (opal_list_item_t*)frag); \
OPAL_THREAD_UNLOCK(&udapl_btl->udapl_lock); \
OPAL_THREAD_ADD32(&udapl_btl->udapl_num_send_tokens, 1); \
return OMPI_SUCCESS; \
} \
} while (0) \
/**
* Return send token and dequeue and pending fragments
*/
#define MCA_BTL_UDAPL_RETURN_TOKEN(btl) \
do { \
OPAL_THREAD_ADD32( &btl->udapl_num_send_tokens, 1 ); \
if(opal_list_get_size(&btl->udapl_pending)) { \
mca_btl_udapl_frag_t* frag; \
OPAL_THREAD_LOCK(&btl->udapl_lock); \
frag = (mca_btl_udapl_frag_t*)opal_list_remove_first(&btl->udapl_pending); \
OPAL_THREAD_UNLOCK(&btl->udapl_lock); \
if(NULL != frag) { \
switch(frag->type) { \
case MCA_BTL_UDAPL_SEND: \
mca_btl_udapl_send(&btl->super, frag->endpoint, &frag->base, frag->hdr->tag); \
break; \
case MCA_BTL_UDAPL_PUT: \
mca_btl_udapl_put(&btl->super, frag->endpoint, &frag->base); \
break; \
case MCA_BTL_UDAPL_GET: \
mca_btl_udapl_get(&btl->super, frag->endpoint, &frag->base); \
break; \
} \
} \
} \
} while (0)
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -23,18 +23,21 @@
#include "opal/util/if.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "mca/pml/pml.h"
#include "mca/btl/btl.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"
#include "mca/base/mca_base_param.h"
#include "mca/pml/base/pml_base_module_exchange.h"
#include "opal/mca/base/mca_base_param.h"
#include "mca/errmgr/errmgr.h"
#include "mca/mpool/base/base.h"
#include "btl_udapl.h"
#include "btl_udapl_frag.h"
#include "btl_udapl_endpoint.h"
#include "mca/btl/base/base.h"
#include "datatype/convertor.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/datatype/convertor.h"
#include "btl_udapl_endpoint.h"
#include "orte/util/proc_info.h"
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
mca_btl_udapl_component_t mca_btl_udapl_component = {
{
@ -78,7 +81,7 @@ static inline char* mca_btl_udapl_param_register_string(
const char* default_value)
{
char *param_value;
int id = mca_base_param_register_string("btl","ib",param_name,NULL,default_value);
int id = mca_base_param_register_string("btl","udapl",param_name,NULL,default_value);
mca_base_param_lookup_string(id, &param_value);
return param_value;
}
@ -87,7 +90,7 @@ static inline int mca_btl_udapl_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("btl","ib",param_name,NULL,default_value);
int id = mca_base_param_register_int("btl","udapl",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
@ -99,42 +102,82 @@ static inline int mca_btl_udapl_param_register_int(
*/
int mca_btl_udapl_component_open(void)
{
{
int param, value;
/* initialize state */
mca_btl_udapl_component.udapl_num_btls=0;
mca_btl_udapl_component.udapl_btls=NULL;
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_udapl_component.udapl_procs, opal_list_t);
OBJ_CONSTRUCT(&mca_btl_udapl_component.udapl_lock, opal_mutex_t);
/* register uDAPL component parameters */
mca_btl_udapl_component.udapl_free_list_num =
mca_btl_udapl_param_register_int ("free_list_num", 8);
mca_btl_udapl_component.udapl_free_list_max =
mca_btl_udapl_param_register_int ("free_list_max", 1024);
mca_btl_udapl_param_register_int ("free_list_max", -1);
mca_btl_udapl_component.udapl_free_list_inc =
mca_btl_udapl_param_register_int ("free_list_inc", 32);
mca_btl_udapl_param_register_int ("free_list_inc", 8);
mca_btl_udapl_component.udapl_debug =
mca_btl_udapl_param_register_int("debug", 0);
mca_btl_udapl_component.udapl_mpool_name =
mca_btl_udapl_param_register_string("mpool", "ib");
mca_btl_udapl_param_register_string("mpool", "udapl");
mca_btl_udapl_component.udapl_max_btls =
mca_btl_udapl_param_register_int("max_modules", 4);
mca_btl_udapl_component.udapl_num_high_priority =
mca_btl_udapl_param_register_int("num_high_priority", 8);
mca_btl_udapl_component.udapl_num_repost =
mca_btl_udapl_param_register_int("num_repost", 4);
mca_btl_udapl_component.udapl_num_mru =
mca_btl_udapl_param_register_int("num_mru", 64);
mca_btl_udapl_component.udapl_port_name=
mca_btl_udapl_param_register_string("port_name", "OMPI");
/* register uDAPL module parameters */
mca_btl_udapl_module.super.btl_exclusivity =
mca_btl_udapl_param_register_int ("exclusivity", 0);
mca_btl_udapl_param_register_int ("exclusivity", MCA_BTL_EXCLUSIVITY_DEFAULT - 10);
mca_btl_udapl_module.super.btl_eager_limit =
mca_btl_udapl_param_register_int ("first_frag_size", 64*1024) - sizeof(mca_btl_base_header_t);
mca_btl_udapl_param_register_int ("eager_limit", 32*1024);
mca_btl_udapl_module.super.btl_min_send_size =
mca_btl_udapl_param_register_int ("min_send_size", 64*1024) - sizeof(mca_btl_base_header_t);
mca_btl_udapl_param_register_int ("min_send_size", 32*1024);
mca_btl_udapl_module.super.btl_max_send_size =
mca_btl_udapl_param_register_int ("max_send_size", 128*1024) - sizeof(mca_btl_base_header_t);
mca_btl_udapl_param_register_int ("max_send_size", 64*1024);
mca_btl_udapl_module.super.btl_min_rdma_size =
mca_btl_udapl_param_register_int("min_rdma_size", 1024*1024);
mca_btl_udapl_param_register_int("min_rdma_size", 512*1024);
mca_btl_udapl_module.super.btl_max_rdma_size =
mca_btl_udapl_param_register_int("max_rdma_size", 1024*1024);
mca_btl_udapl_module.super.btl_flags =
mca_btl_udapl_param_register_int("flags", MCA_BTL_FLAGS_PUT);
mca_btl_udapl_param_register_int("max_rdma_size", 128*1024);
mca_btl_udapl_module.super.btl_bandwidth =
mca_btl_udapl_param_register_int("bandwidth", 225);
/* compute the eager frag size */
#if 0
mca_btl_udapl_component.udapl_eager_frag_size =
udapl_min_size_for_length(mca_btl_udapl_module.super.btl_eager_limit) - 1;
mca_btl_udapl_module.super.btl_eager_limit =
udapl_max_length_for_size(mca_btl_udapl_component.udapl_eager_frag_size) -
sizeof(mca_btl_base_header_t);
#endif
/* compute the max frag size */
#if 0
mca_btl_udapl_component.udapl_max_frag_size =
udapl_min_size_for_length(mca_btl_udapl_module.super.btl_max_send_size) - 1;
mca_btl_udapl_module.super.btl_max_send_size =
udapl_max_length_for_size(mca_btl_udapl_component.udapl_max_frag_size) -
sizeof(mca_btl_base_header_t);
#endif
/* leave pinned option */
value = 0;
param = mca_base_param_find("mpi", NULL, "leave_pinned");
mca_base_param_lookup_int(param, &value);
mca_btl_udapl_component.leave_pinned = value;
return OMPI_SUCCESS;
}
/*
* component cleanup - sanity checking of queue lengths
*/
@ -144,28 +187,143 @@ int mca_btl_udapl_component_close(void)
return OMPI_SUCCESS;
}
/*
* uDAPL component initialization:
* (1) read interface list from kernel and compare against component parameters
* then create a BTL instance for selected interfaces
* (2) setup uDAPL listen socket for incoming connection attempts
* (3) register BTL parameters with the MCA
/**
* Initialize module instance
*/
mca_btl_base_module_t** mca_btl_udapl_component_init(int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
#if 0
static int
mca_btl_udapl_module_init (mca_btl_udapl_module_t * btl)
{
return NULL;
/*mca_mpool_base_resources_t resources;*/
/*int32_t num_high_priority;
int32_t i;
int rc;*/
/* initialize objects */
OBJ_CONSTRUCT(&btl->udapl_frag_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->udapl_frag_max, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->udapl_frag_user, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->udapl_pending, opal_list_t);
OBJ_CONSTRUCT(&btl->udapl_repost, opal_list_t);
OBJ_CONSTRUCT(&btl->udapl_mru_reg, opal_list_t);
OBJ_CONSTRUCT(&btl->udapl_lock, opal_mutex_t);
/* query nic tokens */
/* initialize memory pool */
/* initialize free lists */
/* post receive buffers */
/* enable rdma */
return OMPI_SUCCESS;
}
#endif
/*
* uDAPL component progress.
* Register uDAPL component addressing information. The MCA framework
* will make this available to all peers.
*/
static int
mca_btl_udapl_modex_send(void)
{
int rc;
size_t i;
size_t size;
mca_btl_udapl_addr_t *addrs = NULL;
size = mca_btl_udapl_component.udapl_num_btls * sizeof (mca_btl_udapl_addr_t);
if (0 != size) {
addrs = (mca_btl_udapl_addr_t *)malloc (size);
if (NULL == addrs) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for (i = 0; i < mca_btl_udapl_component.udapl_num_btls; i++) {
mca_btl_udapl_module_t *btl = mca_btl_udapl_component.udapl_btls[i];
addrs[i] = btl->udapl_addr;
}
}
rc = mca_pml_base_modex_send (&mca_btl_udapl_component.super.btl_version, addrs, size);
if (NULL != addrs) {
free (addrs);
}
return rc;
}
/*
* Initialize the uDAPL component,
* check how many interfaces are available and create a btl module for each.
*/
mca_btl_base_module_t **
mca_btl_udapl_component_init (int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
{
DAT_PROVIDER_INFO* datinfo;
mca_btl_base_module_t **btls;
*num_btl_modules = 0;
/* enumerate uDAPL interfaces */
datinfo = malloc(mca_btl_udapl_component.udapl_max_btls * sizeof(DAT_PROVIDER_INFO));
if(DAT_SUCCESS != dat_registry_list_providers(mca_btl_udapl_component.udapl_max_btls,
(DAT_COUNT*)&mca_btl_udapl_component.udapl_num_btls, &datinfo)) {
free(datinfo);
return NULL;
}
/* create a BTL module for each interface */
/* finished with datinfo */
free(datinfo);
/* publish uDAPL parameters with the MCA framework */
if (OMPI_SUCCESS != mca_btl_udapl_modex_send()) {
return NULL;
}
/* return array of BTLs */
btls = (mca_btl_base_module_t**) malloc (
mca_btl_udapl_component.udapl_num_btls * sizeof(mca_btl_base_module_t *));
if (NULL == btls) {
return NULL;
}
memcpy(btls, mca_btl_udapl_component.udapl_btls,
mca_btl_udapl_component.udapl_num_btls * sizeof(mca_btl_udapl_module_t *));
*num_btl_modules = mca_btl_udapl_component.udapl_num_btls;
return btls;
}
/*
* GM component progress.
*/
int mca_btl_udapl_component_progress()
{
return 0;
static int32_t inprogress = 0;
int count = 0;
size_t i;
/* could get into deadlock in this case as we post recvs after callback completes */
if(OPAL_THREAD_ADD32(&inprogress, 1) > 1) {
OPAL_THREAD_ADD32(&inprogress, -1);
return OMPI_SUCCESS;
}
for( i = 0; i < mca_btl_udapl_component.udapl_num_btls; ) {
}
OPAL_THREAD_ADD32(&inprogress, -1);
return count;
}

Просмотреть файл

@ -29,7 +29,23 @@
extern "C" {
#endif
OBJ_CLASS_DECLARATION(mca_btl_udapl_endpoint_t);
/**
* Structure used to publish uDAPL id information to peers.
*/
struct mca_btl_udapl_addr_t {
#if 0
#if GM_API_VERSION > 0x200
unsigned int global_id;
#else
char global_id[GM_MAX_HOST_NAME_LEN];
#endif /* GM_API_VERSION > 0x200 */
#endif
unsigned int node_id;
unsigned int port_id;
};
typedef struct mca_btl_udapl_addr_t mca_btl_udapl_addr_t;
/**
* An abstraction that represents a connection to a endpoint process.
@ -46,11 +62,14 @@ struct mca_btl_base_endpoint_t {
struct mca_btl_udapl_proc_t* endpoint_proc;
/**< proc structure corresponding to endpoint */
mca_btl_udapl_addr_t endpoint_addr;
};
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
typedef mca_btl_base_endpoint_t mca_btl_udapl_endpoint_t;
OBJ_CLASS_DECLARATION(mca_btl_udapl_endpoint_t);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -4,7 +4,6 @@
static void mca_btl_udapl_frag_common_constructor(mca_btl_udapl_frag_t* frag)
{
mca_btl_udapl_frag_common_constructor(frag);
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_dst = NULL;
@ -13,20 +12,27 @@ static void mca_btl_udapl_frag_common_constructor(mca_btl_udapl_frag_t* frag)
static void mca_btl_udapl_frag_eager_constructor(mca_btl_udapl_frag_t* frag)
{
frag->hdr = (mca_btl_base_header_t*)(frag + 1);
frag->segment.seg_addr.pval = (unsigned char*)(frag->hdr + 1);
frag->segment.seg_len = mca_btl_udapl_module.super.btl_eager_limit - sizeof(mca_btl_base_header_t);
frag->registration = NULL;
frag->size = mca_btl_udapl_module.super.btl_eager_limit;
frag->size = mca_btl_udapl_component.udapl_eager_frag_size;
mca_btl_udapl_frag_common_constructor(frag);
}
static void mca_btl_udapl_frag_max_constructor(mca_btl_udapl_frag_t* frag)
{
frag->hdr = (mca_btl_base_header_t*)(frag + 1);
frag->segment.seg_addr.pval = (unsigned char*)(frag->hdr + 1);
frag->segment.seg_len = mca_btl_udapl_module.super.btl_max_send_size - sizeof(mca_btl_base_header_t);
frag->registration = NULL;
frag->size = mca_btl_udapl_module.super.btl_max_send_size;
frag->size = mca_btl_udapl_component.udapl_max_frag_size;
mca_btl_udapl_frag_common_constructor(frag);
}
static void mca_btl_udapl_frag_user_constructor(mca_btl_udapl_frag_t* frag)
{
frag->hdr = NULL;
frag->size = 0;
mca_btl_udapl_frag_common_constructor(frag);
}

Просмотреть файл

@ -30,19 +30,26 @@ extern "C" {
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_t);
typedef enum {
MCA_BTL_UDAPL_SEND,
MCA_BTL_UDAPL_PUT,
MCA_BTL_UDAPL_GET
} mca_btl_udapl_frag_type_t;
/**
* uDAPL send fragment derived type.
* UDAPL send fragment derived type.
*/
struct mca_btl_udapl_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment;
struct mca_btl_udapl_module_t* btl;
struct mca_btl_base_endpoint_t *endpoint;
struct mca_mpool_base_registration_t* registration;
mca_btl_base_header_t *hdr;
size_t size;
#if MCA_BTL_HAS_MPOOL
struct mca_mpool_base_registration_t* registration;
#endif
/*enum gm_priority priority;*/
mca_btl_udapl_frag_type_t type;
};
typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_t);
@ -108,6 +115,22 @@ OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_user_t);
(opal_list_item_t*)(frag)); \
}
#define MCA_BTL_UDAPL_FRAG_POST(btl,frag) \
do { \
if(opal_list_get_size(&btl->udapl_repost) < (size_t)btl->udapl_num_repost) { \
OPAL_THREAD_LOCK(&btl->udapl_lock); \
opal_list_append(&btl->udapl_repost, (opal_list_item_t*)frag); \
OPAL_THREAD_UNLOCK(&btl->udapl_lock); \
} else { \
OPAL_THREAD_LOCK(&btl->udapl_lock); \
do { \
udapl_provide_receive_buffer(btl->port, frag->hdr, frag->size, frag->priority); \
} while (NULL != (frag = (mca_btl_udapl_frag_t*)opal_list_remove_first(&btl->udapl_repost))); \
OPAL_THREAD_UNLOCK(&btl->udapl_lock); \
} \
} while(0)
#if defined(c_plusplus) || defined(__cplusplus)

Просмотреть файл

@ -45,7 +45,7 @@ void mca_btl_udapl_proc_construct(mca_btl_udapl_proc_t* proc)
}
/*
* Cleanup uDAPL proc instance
* Cleanup ib proc instance
*/
void mca_btl_udapl_proc_destruct(mca_btl_udapl_proc_t* proc)
@ -100,49 +100,55 @@ static mca_btl_udapl_proc_t* mca_btl_udapl_proc_lookup_ompi(ompi_proc_t* ompi_pr
mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
{
mca_btl_udapl_proc_t* module_proc = NULL;
mca_btl_udapl_proc_t* udapl_proc = NULL;
size_t size;
int rc;
/* Check if we have already created a uDAPL proc
* structure for this ompi process */
module_proc = mca_btl_udapl_proc_lookup_ompi(ompi_proc);
if(module_proc != NULL) {
/* Gotcha! */
return module_proc;
udapl_proc = mca_btl_udapl_proc_lookup_ompi(ompi_proc);
if(udapl_proc != NULL) {
return udapl_proc;
}
/* Oops! First time, gotta create a new uDAPL proc
* out of the ompi_proc ... */
/* create a new udapl proc out of the ompi_proc ... */
udapl_proc = OBJ_NEW(mca_btl_udapl_proc_t);
udapl_proc->proc_endpoint_count = 0;
udapl_proc->proc_ompi = ompi_proc;
udapl_proc->proc_guid = ompi_proc->proc_name;
module_proc = OBJ_NEW(mca_btl_udapl_proc_t);
/* Initialize number of peer */
module_proc->proc_endpoint_count = 0;
module_proc->proc_ompi = ompi_proc;
/* build a unique identifier (of arbitrary
* size) to represent the proc */
module_proc->proc_guid = ompi_proc->proc_name;
/* uDAPL module doesn't have addresses exported at
* initialization, so the addr_count is set to one. */
module_proc->proc_addr_count = 1;
/* XXX: Right now, there can be only 1 peer associated
* with a proc. Needs a little bit change in
* mca_btl_udapl_proc_t to allow on demand increasing of
* number of endpoints for this proc */
module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
malloc(module_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*));
if(NULL == module_proc->proc_endpoints) {
OBJ_RELEASE(module_proc);
/* query for the peer address info */
rc = mca_pml_base_modex_recv(
&mca_btl_udapl_component.super.btl_version,
ompi_proc,
(void*)&udapl_proc->proc_addrs,
&size);
if(OMPI_SUCCESS != rc) {
opal_output(0, "[%s:%d] mca_pml_base_modex_recv failed for peer [%d,%d,%d]",
__FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name));
OBJ_RELEASE(udapl_proc);
return NULL;
}
return module_proc;
if((size % sizeof(mca_btl_udapl_addr_t)) != 0) {
opal_output(0, "[%s:%d] invalid udapl address for peer [%d,%d,%d]",
__FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name));
OBJ_RELEASE(udapl_proc);
return NULL;
}
udapl_proc->proc_addr_count = size/sizeof(mca_btl_udapl_addr_t);
if (0 == udapl_proc->proc_addr_count) {
udapl_proc->proc_endpoints = NULL;
} else {
udapl_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
malloc(udapl_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*));
}
if(NULL == udapl_proc->proc_endpoints) {
OBJ_RELEASE(udapl_proc);
return NULL;
}
return udapl_proc;
}
@ -151,12 +157,45 @@ mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
* already held. Insert a btl instance into the proc array and assign
* it an address.
*/
int mca_btl_udapl_proc_insert(mca_btl_udapl_proc_t* module_proc,
mca_btl_base_endpoint_t* module_endpoint)
int mca_btl_udapl_proc_insert(
mca_btl_udapl_proc_t* udapl_proc,
mca_btl_base_endpoint_t* udapl_endpoint)
{
/* insert into endpoint array */
module_endpoint->endpoint_proc = module_proc;
module_proc->proc_endpoints[module_proc->proc_endpoint_count++] = module_endpoint;
/*mca_btl_udapl_module_t* udapl_btl = udapl_endpoint->endpoint_btl;*/
/* insert into endpoint array */
if(udapl_proc->proc_addr_count <= udapl_proc->proc_endpoint_count)
return OMPI_ERR_OUT_OF_RESOURCE;
udapl_endpoint->endpoint_proc = udapl_proc;
udapl_endpoint->endpoint_addr = udapl_proc->proc_addrs[udapl_proc->proc_endpoint_count];
#if 0
#if GM_API_VERSION > 0x200
if (GM_SUCCESS != udapl_global_id_to_node_id(
udapl_btl->port,
udapl_endpoint->endpoint_addr.global_id,
&udapl_endpoint->endpoint_addr.node_id)) {
opal_output( 0, "[%s:%d] error in converting global to local id \n",
__FILE__, __LINE__ );
return OMPI_ERROR;
}
if(mca_btl_udapl_component.udapl_debug > 0) {
opal_output(0, "[%d,%d,%d] mapped global id %lu to node id %lu\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
udapl_endpoint->endpoint_addr.global_id,
udapl_endpoint->endpoint_addr.node_id);
}
#else
udapl_endpoint->udapl_addr.node_id = udapl_host_name_to_node_id( udapl_btl->udapl_port,
udapl_endpoint->udapl_addr.global_id);
if( GM_NO_SUCH_NODE_ID == udapl_endpoint->udapl_addr.node_id ) {
ompi_output( 0, "[%s:%d] unable to convert the remote host name (%s) to a host id",
__FILE__, __LINE__, udapl_endpoint->udapl_addr.global_id);
return OMPI_ERROR;
}
#endif /* GM_API_VERSION > 0x200 */
#endif
udapl_proc->proc_endpoints[udapl_proc->proc_endpoint_count] = udapl_endpoint;
udapl_proc->proc_endpoint_count++;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -46,8 +46,11 @@ struct mca_btl_udapl_proc_t {
orte_process_name_t proc_guid;
/**< globally unique identifier for the process */
struct mca_btl_udapl_addr_t* proc_addrs;
/**< array of addresses exported by peer */
size_t proc_addr_count;
/**< number of addresses published by endpoint */
/**< number of addresses published by peer */
struct mca_btl_base_endpoint_t **proc_endpoints;
/**< array of endpoints that have been created to access this proc */