1
1

The self module. There are still some improuvements to do, to decrease the overhead. We can have a shortcut from the MPI_Sendrecv function to avoid creating requests if possible.

This commit was SVN r1221.
Этот коммит содержится в:
George Bosilca 2004-06-10 19:16:08 +00:00
родитель 630c2f0c8b
Коммит ab0460b1fa
8 изменённых файлов: 498 добавлений и 0 удалений

36
src/mca/ptl/self/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,36 @@
#
# $HEADER$
#
# Use the top-level OpenMPI Makefile.options
include $(top_ompi_srcdir)/config/Makefile.options
SUBDIRS = src
EXTRA_DIST = VERSION
# According to the MCA spec, we have to make the output library here
# in the top-level directory, and it has to be named
# libompi_ssi_coll_ompi_basic.la
if OMPI_BUILD_ptl_self_LOADABLE_MODULE
module_noinst =
module_install = mca_ptl_self.la
else
module_noinst = libmca_ptl_self.la
module_install =
endif
mcamoduledir = $(libdir)/ompi
mcamodule_LTLIBRARIES = $(module_install)
mca_ptl_self_la_SOURCES =
mca_ptl_self_la_LIBADD = \
src/libmca_ptl_self.la \
$(LIBOMPI_LA)
mca_ptl_self_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(module_noinst)
libmca_ptl_self_la_SOURCES =
libmca_ptl_self_la_LIBADD = src/libmca_ptl_self.la
libmca_ptl_self_la_LDFLAGS = -module -avoid-version

6
src/mca/ptl/self/VERSION Обычный файл
Просмотреть файл

@ -0,0 +1,6 @@
major=1
minor=0
release=0
alpha=0
beta=0
svn=1

10
src/mca/ptl/self/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,10 @@
# -*- shell-script -*-
#
# $HEADER$
#
# Specific to this module
PARAM_INIT_FILE=src/ptl_self.c
PARAM_CONFIG_HEADER_FILE="src/self_config.h"
PARAM_CONFIG_FILES="Makefile src/Makefile"

18
src/mca/ptl/self/src/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,18 @@
# -*- makefile -*-
#
# $HEADER$
#
include $(top_ompi_srcdir)/config/Makefile.options
AM_CPPFLAGS = \
-I$(top_ompi_builddir)/src/include \
-I$(top_ompi_builddir)/src/ompi/event \
-I$(top_ompi_srcdir)/src \
-I$(top_ompi_srcdir)/src/include
noinst_LTLIBRARIES = libmca_ptl_self.la
libmca_ptl_self_la_SOURCES = \
ptl_self.c \
ptl_self.h \
ptl_self_module.c

189
src/mca/ptl/self/src/ptl_self.c Обычный файл
Просмотреть файл

@ -0,0 +1,189 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* $HEADER$
*/
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include "constants.h"
#include "event/event.h"
#include "util/argv.h"
#include "util/output.h"
#include "mca/pml/pml.h"
#include "mca/ptl/ptl.h"
#include "mca/ptl/base/ptl_base_sendreq.h"
#include "mca/ptl/base/ptl_base_recvfrag.h"
#include "mca/base/mca_base_param.h"
#include "mca/pml/teg/src/pml_teg_sendreq.h"
#include "ptl_self.h"
mca_ptl_t mca_ptl_self = {
&mca_ptl_self_module.super,
0, /* ptl_frag_first_size */
0, /* ptl_frag_min_size */
0, /* ptl_frag_max_size */
65535, /* ptl_exclusivity */
0, /* ptl_latency */
0, /* ptl_bandwidth */
MCA_PTL_PUT, /* ptl flags */
mca_ptl_self_add_proc,
mca_ptl_self_del_proc,
mca_ptl_self_finalize,
mca_ptl_self_send, /* put */
NULL, /* get */
mca_ptl_self_matched, /* matched */
mca_ptl_self_request_alloc,
mca_ptl_self_request_return,
NULL, /* match */
NULL,
NULL
};
extern mca_ptl_self_module_1_0_0_t mca_ptl_self_module ;
int mca_ptl_self_add_proc(struct mca_ptl_t* ptl, size_t nprocs, struct ompi_proc_t **ompi_proc, struct mca_ptl_base_peer_t** peer_ret, ompi_bitmap_t* reachable)
{
int i, count;
mca_ptl_self_module.self_local = ompi_proc_local();
for( i = 0, count = 0; i < nprocs; i++ ) {
if( ompi_proc[i] == mca_ptl_self_module.self_local ) {
ompi_bitmap_set_bit( reachable, i );
count++;
}
}
return OMPI_SUCCESS;
}
int mca_ptl_self_del_proc(struct mca_ptl_t* ptl, size_t nprocs, struct ompi_proc_t **proc, struct mca_ptl_base_peer_t** ptl_peer)
{
return OMPI_SUCCESS;
}
/* before the module is unloaded (called once)*/
int mca_ptl_self_finalize(struct mca_ptl_t* ptl)
{
return OMPI_SUCCESS;
}
int mca_ptl_self_request_alloc(struct mca_ptl_t* ptl, struct mca_ptl_base_send_request_t** request)
{
int rc;
mca_ptl_base_send_request_t* sendreq;
ompi_list_item_t* send_item;
OMPI_FREE_LIST_GET( &mca_ptl_self_module.self_send_requests, send_item, rc );
sendreq = (mca_ptl_base_send_request_t*)send_item;
sendreq->req_owner = ptl;
*request = sendreq;
return rc;
}
void mca_ptl_self_request_return(struct mca_ptl_t* ptl, struct mca_ptl_base_send_request_t* request)
{
OMPI_FREE_LIST_RETURN( &mca_ptl_self_module.self_send_requests, (ompi_list_item_t*)request);
}
/*
* Initiate a send. If this is the first fragment, use the fragment
* descriptor allocated with the send requests, otherwise obtain
* one from the free list. Initialize the fragment and foward
* on to the peer.
*/
int mca_ptl_self_send(
struct mca_ptl_t* ptl,
struct mca_ptl_base_peer_t* ptl_base_peer,
struct mca_ptl_base_send_request_t* request,
size_t offset,
size_t size,
int flags )
{
mca_ptl_self_send_request_t* req = (mca_ptl_self_send_request_t*)request;
mca_ptl_base_header_t* hdr = &(req->req_frag.super.frag_header);
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_MATCH;
hdr->hdr_common.hdr_flags = flags;
hdr->hdr_common.hdr_size = sizeof(mca_ptl_base_match_header_t);
hdr->hdr_frag.hdr_frag_offset = offset;
hdr->hdr_frag.hdr_frag_seq = 0;
hdr->hdr_match.hdr_contextid = request->super.req_comm->c_contextid;
hdr->hdr_match.hdr_src = request->super.req_comm->c_my_rank;
hdr->hdr_match.hdr_dst = request->super.req_peer;
hdr->hdr_match.hdr_tag = request->super.req_tag;
hdr->hdr_match.hdr_msg_length = request->req_bytes_packed;
hdr->hdr_match.hdr_msg_seq = request->super.req_sequence;
hdr->hdr_frag.hdr_frag_length = request->req_bytes_packed;
hdr->hdr_frag.hdr_frag_offset = 0 ;
hdr->hdr_frag.hdr_src_ptr.pval = (void*)req;
req->req_frag.super.frag_peer = ptl_base_peer;
req->req_frag.super.frag_size = request->req_bytes_packed;
req->req_frag.super.frag_owner = &mca_ptl_self;
req->req_frag.frag_request = NULL;
req->req_frag.frag_is_buffered = 0;
ptl->ptl_match( &(req->req_frag), &(hdr->hdr_match) );
return OMPI_SUCCESS;
}
/*
* A posted receive has been matched - if required send an
* ack back to the peer and process the fragment.
*/
void mca_ptl_self_matched( mca_ptl_t* ptl,
mca_ptl_base_recv_frag_t* frag)
{
mca_ptl_self_send_request_t* sendreq = (mca_ptl_self_send_request_t*)(frag->super.frag_header.hdr_frag.hdr_src_ptr.pval);
mca_ptl_base_recv_request_t* recvreq = frag->frag_request;
mca_ptl_base_send_frag_t sendfrag;
/* Did you have the same datatype or not ? If yes we can use an optimized version
* for the copy function, if not we have to use a temporary buffer to pack/unpack
*/
if( sendreq->super.super.req_datatype == recvreq->super.req_datatype ) {
ompi_ddt_copy_content_same_ddt( recvreq->super.req_datatype, recvreq->super.req_count,
recvreq->super.req_addr, sendreq->super.super.req_addr );
} else {
ompi_convertor_t *pSendConvertor, *pRecvConvertor;
struct iovec iov[1];
int completed, iov_count, length;
char* buf;
/* We use a temporary buffer as it look to be faster on much architectures */
length = 64 * 1024;
buf = malloc( length * sizeof(char) );
ompi_convertor_init_for_recv( &(frag->super.frag_convertor), 0, recvreq->super.req_datatype,
recvreq->super.req_count, recvreq->super.req_addr, 0 );
pSendConvertor = &(sendreq->super.req_convertor);
pRecvConvertor = &(frag->super.frag_convertor);
completed = 0;
while( !completed ) {
iov[0].iov_base = buf;
iov[0].iov_len = length;
iov_count = 1;
completed |= ompi_convertor_pack( pSendConvertor, iov, iov_count );
/*assert( freeAfter == 0 );*/
completed |= ompi_convertor_unpack( pRecvConvertor, iov, iov_count );
/*assert( freeAfter == 0 );*/
}
free( buf );
}
/* Now lets progress the request */
sendfrag.frag_request = &(sendreq->super);
/*sendfrag.super.frag_header = ;*/
sendfrag.super.frag_owner = &mca_ptl_self;
sendfrag.super.frag_peer = NULL;
sendfrag.super.frag_addr = NULL;
sendfrag.super.frag_size = sendreq->super.req_bytes_packed;
ptl->ptl_send_progress( &(sendreq->super), &(sendfrag) );
ptl->ptl_recv_progress( recvreq, frag );
}

86
src/mca/ptl/self/src/ptl_self.h Обычный файл
Просмотреть файл

@ -0,0 +1,86 @@
/*
*$HEADER$
*/
/**
* @file
*/
#ifndef PTL_SELF_H_HAS_BEEN_INCLUDED
#define PTL_SELF_H_HAS_BEEN_INCLUDED
#include "mem/free_list.h"
#include "event/event.h"
#include "mca/pml/pml.h"
#include "mca/ptl/ptl.h"
#include "mca/ptl/base/ptl_base_recvreq.h"
#include "mca/ptl/base/ptl_base_recvfrag.h"
/**
* SELF PTL module.
*/
struct mca_ptl_self_module_1_0_0_t {
mca_ptl_base_module_1_0_0_t super; /**< base PTL module */
struct mca_ptl_t** self_ptls; /**< array of available PTLs */
u_int32_t self_num_ptls; /**< number of ptls actually used */
u_int32_t self_max_ptls; /**< maximum number of ptls - available kernel ifs */
u_int32_t self_buf_size; /**< the size of the internal buffer used to pack/unpack the data */
u_int32_t self_is_non_blocking; /**< how the memcopy operations are done segmented or not */
int32_t self_free_list_num; /**< initial size of free lists */
int32_t self_free_list_max; /**< maximum size of free lists */
int32_t self_free_list_inc; /**< number of elements to alloc when growing free lists */
ompi_free_list_t self_send_requests; /**< free list of self send requests -- sendreq + sendfrag */
ompi_proc_t* self_local; /**< the self proc instance corresponding to the local process */
};
typedef struct mca_ptl_self_module_1_0_0_t mca_ptl_self_module_1_0_0_t;
typedef struct mca_ptl_self_module_1_0_0_t mca_ptl_self_module_t;
/**
* Self send request derived type. The send request contains both the
* base send request, and the base receive fragment which will be used to do the match.
*/
struct mca_ptl_self_send_request_t {
mca_ptl_base_send_request_t super;
mca_ptl_base_recv_frag_t req_frag; /* first fragment */
};
typedef struct mca_ptl_self_send_request_t mca_ptl_self_send_request_t;
OBJ_CLASS_DECLARATION(mca_ptl_self_send_request_t);
extern mca_ptl_self_module_1_0_0_t mca_ptl_self_module;
/**
* Register SELF module parameters with the MCA framework
*/
extern int mca_ptl_self_module_open(void);
/**
* Any final cleanup before being unloaded.
*/
extern int mca_ptl_self_module_close(void);
/**
* SELF module initialization.
*
* @param num_ptls (OUT) Number of PTLs returned in PTL array.
* @param allow_multi_user_threads (OUT) Flag indicating wether PTL supports user threads (TRUE)
* @param have_hidden_threads (OUT) Flag indicating wether PTL uses threads (TRUE)
*
* (1) prepare the local buffering and initialize the SELF
* engine.
*/
extern mca_ptl_t** mca_ptl_self_module_init(
int *num_ptls,
bool *allow_multi_user_threads,
bool *have_hidden_threads
);
int mca_ptl_self_add_proc(struct mca_ptl_t* ptl, size_t nprocs, struct ompi_proc_t **ompi_proc, struct mca_ptl_base_peer_t** peer_ret, ompi_bitmap_t* reachable);
int mca_ptl_self_del_proc(struct mca_ptl_t* ptl, size_t nprocs, struct ompi_proc_t **proc, struct mca_ptl_base_peer_t** ptl_peer);
int mca_ptl_self_finalize(struct mca_ptl_t* ptl);
int mca_ptl_self_request_alloc(struct mca_ptl_t* ptl, struct mca_ptl_base_send_request_t** request);
void mca_ptl_self_request_return(struct mca_ptl_t* ptl, struct mca_ptl_base_send_request_t* request);
int mca_ptl_self_send( struct mca_ptl_t* ptl, struct mca_ptl_base_peer_t* ptl_base_peer, struct mca_ptl_base_send_request_t* request,
size_t offset, size_t size, int flags );
void mca_ptl_self_matched( mca_ptl_t* ptl, mca_ptl_base_recv_frag_t* frag );
#endif /* PTL_SELF_H_HAS_BEEN_INCLUDED */

153
src/mca/ptl/self/src/ptl_self_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,153 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* $HEADER$
*/
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include "constants.h"
#include "event/event.h"
#include "util/if.h"
#include "util/argv.h"
#include "util/output.h"
#include "mca/pml/pml.h"
#include "mca/ptl/ptl.h"
#include "mca/ptl/base/ptl_base_sendreq.h"
#include "mca/ptl/base/ptl_base_recvfrag.h"
#include "mca/base/mca_base_param.h"
#include "mca/base/mca_base_module_exchange.h"
#include "ptl_self.h"
mca_ptl_self_module_1_0_0_t mca_ptl_self_module = {
{
/* First, the mca_base_module_t struct containing meta information
about the module itself */
{
/* Indicate that we are a pml v1.0.0 module (which also implies a
specific MCA version) */
MCA_PTL_BASE_VERSION_1_0_0,
"self", /* MCA module name */
1, /* MCA module major version */
0, /* MCA module minor version */
0, /* MCA module release version */
mca_ptl_self_module_open, /* module open */
mca_ptl_self_module_close /* module close */
},
/* Next the MCA v1.0.0 module meta data */
{
/* Whether the module is checkpointable or not */
true
},
mca_ptl_self_module_init,
NULL,
NULL,
}
};
static void mca_ptl_self_send_request_construct(mca_ptl_self_send_request_t* request)
{
OBJ_CONSTRUCT(&request->req_frag, mca_ptl_base_recv_frag_t);
}
static void mca_ptl_self_send_request_destruct(mca_ptl_self_send_request_t* request)
{
OBJ_DESTRUCT(&request->req_frag);
}
OBJ_CLASS_INSTANCE( mca_ptl_self_send_request_t, mca_ptl_base_send_request_t,
mca_ptl_self_send_request_construct,
mca_ptl_self_send_request_destruct );
/*
* utility routines for parameter registration
*/
static inline int mca_ptl_self_param_register_int(
const char* param_name,
int default_value )
{
int id = mca_base_param_register_int("ptl","self",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
/*
* Called by MCA framework to open the module, registers
* module parameters.
*/
int mca_ptl_self_module_open(void)
{
/* initialize state */
mca_ptl_self_module.self_ptls = NULL;
mca_ptl_self_module.self_num_ptls = 0;
/* initialize objects */
/* register SELF module parameters */
mca_ptl_self_module.self_buf_size =
mca_ptl_self_param_register_int("buffer_size", 64*1024);
mca_ptl_self_module.self_is_non_blocking =
mca_ptl_self_param_register_int("nonblocking", 1);
return OMPI_SUCCESS;
}
int mca_ptl_self_module_close(void)
{
if (mca_ptl_self_module.self_send_requests.fl_num_allocated !=
mca_ptl_self_module.self_send_requests.super.ompi_list_length) {
ompi_output(0, "self send requests: %d allocated %d returned\n",
mca_ptl_self_module.self_send_requests.fl_num_allocated,
mca_ptl_self_module.self_send_requests.super.ompi_list_length);
}
if (NULL != mca_ptl_self_module.self_ptls)
free(mca_ptl_self_module.self_ptls);
OBJ_DESTRUCT( &(mca_ptl_self_module.self_send_requests) );
return OMPI_SUCCESS;
}
extern mca_ptl_t mca_ptl_self;
mca_ptl_t** mca_ptl_self_module_init(int *num_ptls,
bool *allow_multi_user_threads,
bool *have_hidden_threads)
{
*num_ptls = 0;
*allow_multi_user_threads = true;
*have_hidden_threads = false;
mca_ptl_self_module.self_ptls = malloc(sizeof(mca_ptl_t*));
if( NULL == mca_ptl_self_module.self_ptls )
return NULL;
mca_ptl_self_module.self_ptls[0] = &mca_ptl_self;
mca_ptl_self_module.self_num_ptls = 1;
mca_ptl_self_module.self_max_ptls = 1;
mca_ptl_self_module.self_free_list_num = 4;
mca_ptl_self_module.self_free_list_max = -1;
mca_ptl_self_module.self_free_list_inc = 4;
*num_ptls = 1;
OBJ_CONSTRUCT(&mca_ptl_self_module.self_send_requests, ompi_free_list_t);
ompi_free_list_init(&mca_ptl_self_module.self_send_requests,
sizeof(mca_ptl_self_send_request_t),
OBJ_CLASS(mca_ptl_self_send_request_t),
mca_ptl_self_module.self_free_list_num,
mca_ptl_self_module.self_free_list_max,
mca_ptl_self_module.self_free_list_inc,
NULL); /* use default allocator */
return mca_ptl_self_module.self_ptls;
}

0
src/mca/ptl/self/src/self_config.h Обычный файл
Просмотреть файл