1
1

First take at a BTL for Portals 4

This commit was SVN r28759.
Этот коммит содержится в:
Brian Barrett 2013-07-11 20:47:08 +00:00
родитель 9252afdcd9
Коммит bea54eeeb1
12 изменённых файлов: 2370 добавлений и 0 удалений

56
ompi/mca/btl/portals4/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,56 @@
#
# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2010-2012 Sandia National Laboratories. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
AM_CPPFLAGS = $(btl_portals4_CPPFLAGS)
if MCA_BUILD_ompi_btl_portals4_DSO
component_noinst =
component_install = mca_btl_portals4.la
else
component_noinst = libmca_btl_portals4.la
component_install =
endif
local_sources = \
btl_portals4.c \
btl_portals4.h \
btl_portals4_component.c \
btl_portals4_frag.c \
btl_portals4_frag.h \
btl_portals4_rdma.c \
btl_portals4_recv.c \
btl_portals4_recv.h \
btl_portals4_send.c
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_btl_portals4_la_SOURCES = $(local_sources)
mca_btl_portals4_la_LIBADD = \
$(btl_portals4_LIBS)
mca_btl_portals4_la_LDFLAGS = -module -avoid-version $(btl_portals4_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_btl_portals4_la_SOURCES = $(local_sources)
libmca_btl_portals4_la_LIBADD = $(btl_portals4_LIBS)
libmca_btl_portals4_la_LDFLAGS = -module -avoid-version $(btl_portals4_LDFLAGS)

453
ompi/mca/btl/portals4/btl_portals4.c Обычный файл
Просмотреть файл

@ -0,0 +1,453 @@
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010-2012 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <inttypes.h>
#include "ompi/runtime/ompi_module_exchange.h"
#include "opal/class/opal_bitmap.h"
#include "ompi/constants.h"
#include "ompi/mca/btl/btl.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/proc/proc.h"
#include "btl_portals4.h"
#include "btl_portals4_recv.h"
mca_btl_portals4_module_t mca_btl_portals4_module = {
{
&mca_btl_portals4_component.super,
/* NOTE: All these default values are set in
component_open() */
0, /* max size of first frag */
0, /* min send size */
0, /* max send size */
0, /* btl_rdma_pipeline_send_length */
0, /* btl_rdma_pipeline_frag_size */
0, /* btl_min_rdma_pipeline_size */
0, /* exclusivity - higher than sm, lower than self */
0, /* latency */
0, /* bandwidth */
0, /* btl flags */
0, /* btl segment size */
mca_btl_portals4_add_procs,
mca_btl_portals4_del_procs,
NULL, /* btl_register */
mca_btl_portals4_finalize,
mca_btl_portals4_alloc,
mca_btl_portals4_free,
mca_btl_portals4_prepare_src,
mca_btl_portals4_prepare_dst,
mca_btl_portals4_send,
NULL, /* mca_btl_portals4_sendi, */
NULL, /* mca_btl_portals4_put, */
mca_btl_portals4_get,
mca_btl_base_dump,
NULL, /* mpool */
NULL, /* register error */
NULL
},
};
int
mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** btl_peer_data,
opal_bitmap_t* reachable)
{
int ret;
struct ompi_proc_t *curr_proc = NULL;
ptl_process_t *id;
size_t i, size;
bool need_activate = false;
opal_output_verbose(50, ompi_btl_base_framework.framework_output,
"mca_btl_portals4_add_procs: Adding %d procs (%d)", (int) nprocs,
(int) mca_btl_portals4_module.portals_num_procs);
assert(&mca_btl_portals4_module == (mca_btl_portals4_module_t*) btl_base);
if (0 == mca_btl_portals4_module.portals_num_procs) {
need_activate = true;
}
for (i = 0 ; i < nprocs ; ++i) {
curr_proc = procs[i];
/* portals doesn't support heterogeneous yet... */
if (ompi_proc_local()->proc_arch != curr_proc->proc_arch) {
continue;
}
ret = ompi_modex_recv(&mca_btl_portals4_component.super.btl_version,
curr_proc, (void**) &id, &size);
if (OMPI_SUCCESS != ret) {
opal_output(0, "ompi_modex_recv failed: %d", ret);
return ret;
} else if (sizeof(ptl_process_t) != size) {
opal_output(0, "ompi_modex_recv returned size %d, expected %d",
(int) size, (int) sizeof(ptl_process_t));
return OMPI_ERROR;
}
btl_peer_data[i] = malloc(sizeof(mca_btl_base_endpoint_t));
if (NULL == btl_peer_data[i]) return OMPI_ERROR;
btl_peer_data[i]->ptl_proc = *id;
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "add_procs: nid=%x pid=%x\n", id->phys.nid, id->phys.pid));
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_num_procs, 1);
/* and here we can reach */
opal_bitmap_set_bit(reachable, i);
}
if (need_activate && mca_btl_portals4_module.portals_num_procs > 0) {
ret = mca_btl_portals4_recv_enable(&mca_btl_portals4_module);
}
return OMPI_SUCCESS;
}
int
mca_btl_portals4_del_procs(struct mca_btl_base_module_t *btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t **btl_peer_data)
{
size_t i;
assert(&mca_btl_portals4_module == (mca_btl_portals4_module_t*) btl);
opal_output_verbose(50, ompi_btl_base_framework.framework_output,
"mca_btl_portals4_del_procs: Removing %d procs (%d)", (int) nprocs,
(int) mca_btl_portals4_module.portals_num_procs);
for (i = 0 ; i < nprocs ; ++i) {
free(btl_peer_data[i]);
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_num_procs, -1);
}
return OMPI_SUCCESS;
}
mca_btl_base_descriptor_t*
mca_btl_portals4_alloc(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* endpoint,
uint8_t order,
size_t size,
uint32_t flags)
{
mca_btl_portals4_frag_t* frag;
assert(&mca_btl_portals4_module == (mca_btl_portals4_module_t*) btl_base);
if (size <= mca_btl_portals4_module.super.btl_eager_limit) {
OMPI_BTL_PORTALS4_FRAG_ALLOC_EAGER(&mca_btl_portals4_module, frag);
if (NULL == frag) return NULL;
frag->segments[0].base.seg_len = size;
} else {
OMPI_BTL_PORTALS4_FRAG_ALLOC_MAX(&mca_btl_portals4_module, frag);
if (NULL == frag) return NULL;
frag->segments[0].base.seg_len =
size <= mca_btl_portals4_module.super.btl_max_send_size ?
size : mca_btl_portals4_module.super.btl_max_send_size ;
}
frag->md_h = PTL_INVALID_HANDLE;
frag->base.des_src_cnt = 1;
frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.order = MCA_BTL_NO_ORDER;
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"mca_btl_portals4_alloc: %p\n", (void *) &frag->base));
return &frag->base;
}
int
mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base,
mca_btl_base_descriptor_t* des)
{
mca_btl_portals4_frag_t* frag = (mca_btl_portals4_frag_t*) des;
assert(&mca_btl_portals4_module == (mca_btl_portals4_module_t*) btl_base);
if (BTL_PORTALS4_FRAG_TYPE_EAGER == frag->type) {
/* don't ever unlink eager frags */
OMPI_BTL_PORTALS4_FRAG_RETURN_EAGER(&mca_btl_portals4_module.super, frag);
} else if (BTL_PORTALS4_FRAG_TYPE_MAX == frag->type) {
if (frag->me_h != PTL_INVALID_HANDLE) {
frag->me_h = PTL_INVALID_HANDLE;
}
OMPI_BTL_PORTALS4_FRAG_RETURN_MAX(&mca_btl_portals4_module.super, frag);
} else if (BTL_PORTALS4_FRAG_TYPE_USER == frag->type) {
if (frag->me_h != PTL_INVALID_HANDLE) {
frag->me_h = PTL_INVALID_HANDLE;
}
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"mca_btl_portals4_free: Decrementing portals_outstanding_ops=%d\n", mca_btl_portals4_module.portals_outstanding_ops));
OMPI_BTL_PORTALS4_FRAG_RETURN_USER(&mca_btl_portals4_module.super, frag);
} else {
return OMPI_ERR_BAD_PARAM;
}
return OMPI_SUCCESS;
}
/**
* Pack data and return a descriptor that can be
* used for send/put.
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
*/
mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags)
{
mca_btl_portals4_frag_t* frag;
size_t max_data = *size;
struct iovec iov;
uint32_t iov_count = 1;
int ret;
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"mca_btl_portals4_prepare_src reserve=%ld size=%ld max_data=%ld\n", reserve, *size, max_data));
if (0 != reserve || 0 != opal_convertor_need_buffers(convertor)) {
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "mca_btl_portals4_prepare_src NEED BUFFERS or RESERVE\n"));
frag = (mca_btl_portals4_frag_t*) mca_btl_portals4_alloc(btl_base, peer, MCA_BTL_NO_ORDER, max_data + reserve, flags);
if (NULL == frag) {
return NULL;
}
if (max_data + reserve > frag->size) {
max_data = frag->size - reserve;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*) frag->segments[0].base.seg_addr.pval + reserve;
ret = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
if ( ret < 0 ) {
return NULL;
}
frag->segments[0].base.seg_len = max_data + reserve;
frag->base.des_src_cnt = 1;
} else {
/* no need to pack - rdma operation out of user's buffer */
ptl_me_t me;
/* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, 1) >
mca_btl_portals4_module.portals_max_outstanding_ops) {
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n"));
mca_btl_portals4_component_progress();
}
OMPI_BTL_PORTALS4_FRAG_ALLOC_USER(&mca_btl_portals4_module.super, frag);
if (NULL == frag){
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
return NULL;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"mca_btl_portals4_prepare_src: Incrementing portals_outstanding_ops=%d\n", mca_btl_portals4_module.portals_outstanding_ops));
iov.iov_len = max_data;
iov.iov_base = NULL;
opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
frag->segments[0].base.seg_len = max_data;
frag->segments[0].base.seg_addr.pval = iov.iov_base;
frag->segments[0].key = OPAL_THREAD_ADD64(&(mca_btl_portals4_module.portals_rdma_key), 1);
frag->base.des_src_cnt = 1;
/* either a put or get. figure out which later */
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"rdma src posted for frag 0x%lx, callback 0x%lx, bits %"PRIu64", flags say %d" ,
(unsigned long) frag,
(unsigned long) frag->base.des_cbfunc,
frag->segments[0].key, flags));
/* create a match entry */
me.start = frag->segments[0].base.seg_addr.pval;
me.length = frag->segments[0].base.seg_len;
me.ct_handle = PTL_CT_NONE;
me.min_free = 0;
me.uid = PTL_UID_ANY;
me.options = PTL_ME_OP_GET | PTL_ME_USE_ONCE |
PTL_ME_EVENT_LINK_DISABLE |
PTL_ME_EVENT_COMM_DISABLE |
PTL_ME_EVENT_UNLINK_DISABLE;
me.match_id.phys.nid = peer->ptl_proc.phys.nid;
me.match_id.phys.pid = peer->ptl_proc.phys.pid;
me.match_bits = frag->segments[0].key;
me.ignore_bits = BTL_PORTALS4_PROTOCOL_MASK |
BTL_PORTALS4_CONTEXT_MASK |
BTL_PORTALS4_SOURCE_MASK;
me.ignore_bits = 0;
ret = PtlMEAppend(mca_btl_portals4_module.portals_ni_h,
mca_btl_portals4_module.recv_idx,
&me,
PTL_PRIORITY_LIST,
frag,
&(frag->me_h));
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlMEAppend failed: %d\n",
__FILE__, __LINE__, ret);
OMPI_BTL_PORTALS4_FRAG_RETURN_USER(&mca_btl_portals4_module.super, frag);
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
return NULL;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"PtlMEAppend (prepare_src) frag=%p, me_h=%d start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
(void *)frag, frag->me_h, me.start, me.length,
me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits));
}
frag->base.des_src = &frag->segments[0].base;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.order = MCA_BTL_NO_ORDER;
return &frag->base;
}
mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_dst(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags)
{
mca_btl_portals4_frag_t* frag;
/* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, 1) >
mca_btl_portals4_module.portals_max_outstanding_ops) {
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (2)\n"));
mca_btl_portals4_component_progress();
}
OMPI_BTL_PORTALS4_FRAG_ALLOC_USER(&mca_btl_portals4_module.super, frag);
if (NULL == frag) {
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
return NULL;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"mca_btl_portals4_prepare_dst: Incrementing portals_outstanding_ops=%d\n", mca_btl_portals4_module.portals_outstanding_ops));
frag->segments[0].base.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segments[0].base.seg_addr.pval) );
frag->segments[0].key = OPAL_THREAD_ADD64(&(mca_btl_portals4_module.portals_rdma_key), 1);
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_dst = &frag->segments[0].base;
frag->base.des_dst_cnt = 1;
frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.order = MCA_BTL_NO_ORDER;
frag->md_h = PTL_INVALID_HANDLE;
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"mca_btl_portals4_prepare_dst &base=%p reserve=%ld size=%ld pid=%x key=%ld\n",
(void *)&frag->base, reserve, *size, peer->ptl_proc.phys.pid, frag->segments[0].key));
return &frag->base;
}
int
mca_btl_portals4_finalize(struct mca_btl_base_module_t *btl)
{
int ret;
assert(&mca_btl_portals4_module == (mca_btl_portals4_module_t*) btl);
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"mca_btl_portals4_finalize portals_outstanding_ops=%d\n", mca_btl_portals4_module.portals_outstanding_ops));
/* sanity check */
assert(mca_btl_portals4_module.portals_outstanding_ops >= 0);
/* finalize all communication */
while (mca_btl_portals4_module.portals_outstanding_ops > 0) {
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"mca_btl_portals4_finalize portals_outstanding_ops: %d",
mca_btl_portals4_module.portals_outstanding_ops));
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (3)\n"));
mca_btl_portals4_component_progress();
}
PtlMEUnlink(mca_btl_portals4_module.long_overflow_me_h);
PtlMDRelease(mca_btl_portals4_module.zero_md_h);
if (0 != mca_btl_portals4_module.fixed_md_h) {
int i, fixed_md_nb;
if (MEMORY_MAX_SIZE > mca_btl_portals4_module.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/mca_btl_portals4_module.fixed_md_distance;
else fixed_md_nb = 1;
for (i=0; i< fixed_md_nb; i++) PtlMDRelease(mca_btl_portals4_module.fixed_md_h[i]);
free(mca_btl_portals4_module.fixed_md_h);
}
PtlPTFree(mca_btl_portals4_module.portals_ni_h, mca_btl_portals4_module.recv_idx);
ret = mca_btl_portals4_recv_disable(&mca_btl_portals4_module);
if (PTL_OK != ret) OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Error freeing recv list: %d", ret));
/* destroy eqs */
ret = PtlEQFree(mca_btl_portals4_module.recv_eq_h);
if (PTL_OK != ret) OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Error freeing EQ recv: %d", ret));
ret = PtlNIFini(mca_btl_portals4_module.portals_ni_h);
if (PTL_OK != ret) OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Error returned by PtlNIFini\n"));
PtlFini();
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "mca_btl_portals4_finalize OK\n"));
/* Maybe other objects have to be freed */
return OMPI_SUCCESS;
}

256
ompi/mca/btl/portals4/btl_portals4.h Обычный файл
Просмотреть файл

@ -0,0 +1,256 @@
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010-2012 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef BTL_PORTALS_H_HAS_BEEN_INCLUDED
#define BTL_PORTALS_H_HAS_BEEN_INCLUDED
#include <portals4.h>
#include <btl_portals4_frag.h>
#define MEMORY_MAX_SIZE ((long int)1<<48)
#define EXTENDED_ADDR (0xffff000000000000)
#include "opal/class/opal_free_list.h"
#include "opal/class/opal_list.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
BEGIN_C_DECLS
/*
* Portals BTL component.
*/
struct mca_btl_portals4_component_t {
/* base BTL component */
mca_btl_base_component_2_0_0_t super;
/* output channel for debugging */
int portals_verbosity;
/* initial size of free lists */
int portals_free_list_init_num;
/* max size of free lists */
int portals_free_list_max_num;
/* numer of elements to grow free lists */
int portals_free_list_inc_num;
/* number of eager fragments */
int portals_free_list_eager_max_num;
/* do I need a portals ACK? */
int portals_need_ack;
};
typedef struct mca_btl_portals4_component_t mca_btl_portals4_component_t;
struct mca_btl_portals4_module_t {
/* base BTL module interface */
mca_btl_base_module_t super;
/* number of processes we're actively connected to. Needed to
know when to do activation / shutdown */
int32_t portals_num_procs;
/* Process_id */
ptl_process_t ptl_process_id;
/* fragment free lists */
ompi_free_list_t portals_frag_eager;
ompi_free_list_t portals_frag_max;
ompi_free_list_t portals_frag_user;
/* incoming send message receive memory descriptors */
int portals_recv_mds_num;
int portals_recv_mds_size;
opal_list_t portals_recv_blocks;
/** Length of the receive event queues */
int recv_queue_size;
/** Event queue handle */
ptl_handle_eq_t recv_eq_h;
/* number outstanding sends and local rdma */
volatile int32_t portals_outstanding_ops;
int32_t portals_max_outstanding_ops;
/* key to use for next rdma operation */
volatile int64_t portals_rdma_key;
/* our portals network interface */
ptl_handle_ni_t portals_ni_h;
/** portals index */
ptl_pt_index_t recv_idx;
/** MD handle for sending ACKS */
ptl_handle_md_t zero_md_h;
/** Fixed MD handles covering all of memory for sending normal messages */
ptl_handle_md_t *fixed_md_h;
uint64_t fixed_md_distance;
/** long message receive overflow ME. Persistent ME, first in
overflow list on the recv_idx portal table. */
ptl_handle_me_t long_overflow_me_h;
};
typedef struct mca_btl_portals4_module_t mca_btl_portals4_module_t;
extern mca_btl_portals4_module_t mca_btl_portals4_module;
#define REQ_RECV_TABLE_ID 12
/* match/ignore bit manipulation
*
* 0123 4567 01234567 01234567 01234567 01234567 01234567 01234567 01234567
* | | |
* ^ | context id | source | message tag
* | | | |
* +---- protocol
*/
#define BTL_PORTALS4_PROTOCOL_MASK 0xF000000000000000ULL
#define BTL_PORTALS4_CONTEXT_MASK 0x0FFF000000000000ULL
#define BTL_PORTALS4_SOURCE_MASK 0x0000FFFF00000000ULL
#define BTL_PORTALS4_TAG_MASK 0x00000000FFFFFFFFULL
#define BTL_PORTALS4_PROTOCOL_IGNR BTL_PORTALS4_PROTOCOL_MASK
#define BTL_PORTALS4_CONTEXT_IGNR BTL_PORTALS4_CONTEXT_MASK
#define BTL_PORTALS4_SOURCE_IGNR BTL_PORTALS4_SOURCE_MASK
#define BTL_PORTALS4_TAG_IGNR 0x000000007FFFFFFFULL
#define BTL_PORTALS4_SHORT_MSG 0x1000000000000000ULL
#define BTL_PORTALS4_LONG_MSG 0x2000000000000000ULL
/* send posting */
#define BTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, source, tag, type) \
{ \
match_bits = contextid; \
match_bits = (match_bits << 16); \
match_bits |= source; \
match_bits = (match_bits << 32); \
match_bits |= (BTL_PORTALS4_TAG_MASK & tag) | type; \
}
#define BTL_PORTALS4_SET_HDR_DATA(hdr_data, opcount, length, sync) \
{ \
hdr_data = (sync) ? 1 : 0; \
hdr_data = (hdr_data << 15); \
hdr_data |= opcount & 0x7FFFULL; \
hdr_data = (hdr_data << 48); \
hdr_data |= (length & 0xFFFFFFFFFFFFULL); \
}
int mca_btl_portals4_component_progress(void);
/* BTL interface functions */
int mca_btl_portals4_finalize(struct mca_btl_base_module_t* btl_base);
int mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers,
opal_bitmap_t* reachable);
int mca_btl_portals4_del_procs(struct mca_btl_base_module_t* btl_base,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers);
mca_btl_base_descriptor_t*
mca_btl_portals4_alloc(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* endpoint,
uint8_t order,
size_t size,
uint32_t flags);
int mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base,
mca_btl_base_descriptor_t* des);
mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);
mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_dst(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);
int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag);
int mca_btl_portals4_sendi(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* endpoint,
struct opal_convertor_t* convertor,
void* header,
size_t header_size,
size_t payload_size,
uint8_t order,
uint32_t flags,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t** des);
int mca_btl_portals4_put(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* decriptor);
int mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* decriptor);
int mca_btl_portals4_get_error(int ptl_error);
/*
* global structures
*/
OMPI_MODULE_DECLSPEC extern mca_btl_portals4_component_t mca_btl_portals4_component;
extern mca_btl_portals4_module_t mca_btl_portals4_module;
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_btl_base_endpoint_t is associated w/ each process
* and BTL pair at startup. However, connections to the endpoint
* are established dynamically on an as-needed basis:
*/
struct mca_btl_base_endpoint_t {
ptl_process_t ptl_proc;
};
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
END_C_DECLS
#endif /* BTL_PORTALS_H_HAS_BEEN_INCLUDED */

Просмотреть файл

@ -0,0 +1,784 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010-2012 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/mca/event/event.h"
#include "opal/util/output.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "portals4.h"
#include "btl_portals4.h"
#include "btl_portals4_frag.h"
#include "btl_portals4_recv.h"
static int mca_btl_portals4_component_register(void);
static int mca_btl_portals4_component_open(void);
static int mca_btl_portals4_component_close(void);
static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
bool enable_progress_threads,
bool enable_mpi_threads);
int mca_btl_portals4_component_progress(void);
OMPI_MODULE_DECLSPEC extern mca_btl_portals4_component_t mca_btl_portals4_component;
static unsigned int ompi_btl_portals4_md_size_bit_width;
mca_btl_portals4_component_t mca_btl_portals4_component = {
{
/* First, the mca_base_module_t struct containing meta
information about the module itself */
{
MCA_BTL_BASE_VERSION_2_0_0,
"portals4", /* MCA module name */
OMPI_MAJOR_VERSION, /* MCA module major version */
OMPI_MINOR_VERSION, /* MCA module minor version */
OMPI_RELEASE_VERSION, /* MCA module release version */
mca_btl_portals4_component_open, /* module open */
mca_btl_portals4_component_close, /* module close */
NULL, /* component query */
mca_btl_portals4_component_register, /* component register */
},
{
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
mca_btl_portals4_component_init,
mca_btl_portals4_component_progress,
}
};
static int
mca_btl_portals4_component_register(void)
{
mca_btl_portals4_component.portals_free_list_init_num = 16;
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
"free_list_init_num",
"Initial number of elements to initialize in free lists",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&(mca_btl_portals4_component.portals_free_list_init_num));
mca_btl_portals4_component.portals_free_list_max_num = 1024;
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
"free_list_max_num",
"Max number of elements to initialize in free lists",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&(mca_btl_portals4_component.portals_free_list_max_num));
mca_btl_portals4_component.portals_free_list_inc_num = 16;
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
"free_list_inc_num",
"Increment count for free lists",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&(mca_btl_portals4_component.portals_free_list_inc_num));
mca_btl_portals4_component.portals_free_list_eager_max_num = 32;
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
"eager_frag_limit",
"Maximum number of pre-pinned eager fragments",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&(mca_btl_portals4_component.portals_free_list_eager_max_num));
mca_btl_portals4_component.portals_need_ack = 1; /* default to true.. */
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
"needs_ack",
"Require a portals level ACK",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&(mca_btl_portals4_component.portals_need_ack));
mca_btl_portals4_module.recv_queue_size = 1024 * 1024;
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
"eq_recv_size",
"Size of the receive event queue",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&(mca_btl_portals4_module.recv_queue_size));
ompi_btl_portals4_md_size_bit_width = 48;
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
"md_size_bit_width",
"Number of bits used to specify the length of an MD to the portals4 library",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_btl_portals4_md_size_bit_width);
mca_btl_portals4_module.portals_max_outstanding_ops = 8 * 1024;
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
"max_pending_ops",
"Maximum number of pending send/rdma frags",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&(mca_btl_portals4_module.portals_max_outstanding_ops));
mca_btl_portals4_module.portals_recv_mds_num = 32;
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
"recv_md_num",
"Number of send frag receive descriptors",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&(mca_btl_portals4_module.portals_recv_mds_num));
mca_btl_portals4_module.portals_recv_mds_size = 64 * 1024;
(void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
"recv_md_size",
"Size of send frag receive descriptors",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&(mca_btl_portals4_module.portals_recv_mds_size));
return OMPI_SUCCESS;
}
static int
mca_btl_portals4_component_open(void)
{
unsigned int i;
uint64_t fixed_md_nb;
mca_btl_portals4_component.portals_verbosity = opal_output_get_verbosity(ompi_btl_base_framework.framework_output);
OPAL_OUTPUT_VERBOSE((1, ompi_btl_base_framework.framework_output, "mca_btl_portals4_component_open\n"));
/*
* fill default module state
*/
mca_btl_portals4_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW + 100;
mca_btl_portals4_module.super.btl_eager_limit = 32 * 1024;
mca_btl_portals4_module.super.btl_rndv_eager_limit = 32 * 1024;
mca_btl_portals4_module.super.btl_max_send_size = 64 * 1024;
mca_btl_portals4_module.super.btl_rdma_pipeline_send_length = 64 * 1024;
mca_btl_portals4_module.super.btl_rdma_pipeline_frag_size = INT_MAX;
mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0;
mca_btl_portals4_module.super.btl_flags =
MCA_BTL_FLAGS_RDMA |
MCA_BTL_FLAGS_RDMA_MATCHED;
mca_btl_portals4_module.super.btl_seg_size = sizeof (mca_btl_portals4_segment_t);
mca_btl_portals4_module.super.btl_bandwidth = 1000;
mca_btl_portals4_module.super.btl_latency = 0;
mca_btl_base_param_register(&mca_btl_portals4_component.super.btl_version, &mca_btl_portals4_module.super);
mca_btl_portals4_module.portals_num_procs = 0;
mca_btl_portals4_module.recv_eq_h = PTL_EQ_NONE;
if (48 < ompi_btl_portals4_md_size_bit_width) ompi_btl_portals4_md_size_bit_width = 48;
mca_btl_portals4_module.fixed_md_distance = (unsigned long int) 1<<ompi_btl_portals4_md_size_bit_width;
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"fixed_md_distance=%16.16lx\n", mca_btl_portals4_module.fixed_md_distance);
OBJ_CONSTRUCT(&(mca_btl_portals4_module.portals_frag_eager), ompi_free_list_t);
OBJ_CONSTRUCT(&(mca_btl_portals4_module.portals_frag_max), ompi_free_list_t);
OBJ_CONSTRUCT(&(mca_btl_portals4_module.portals_frag_user), ompi_free_list_t);
/* eager frags */
ompi_free_list_init_new(&(mca_btl_portals4_module.portals_frag_eager),
sizeof(mca_btl_portals4_frag_eager_t) +
mca_btl_portals4_module.super.btl_eager_limit,
opal_cache_line_size,
OBJ_CLASS(mca_btl_portals4_frag_eager_t),
0,opal_cache_line_size,
mca_btl_portals4_component.portals_free_list_init_num,
mca_btl_portals4_component.portals_free_list_eager_max_num,
mca_btl_portals4_component.portals_free_list_inc_num,
NULL);
/* send frags */
ompi_free_list_init_new(&(mca_btl_portals4_module.portals_frag_max),
sizeof(mca_btl_portals4_frag_max_t) +
mca_btl_portals4_module.super.btl_max_send_size,
opal_cache_line_size,
OBJ_CLASS(mca_btl_portals4_frag_max_t),
0,opal_cache_line_size,
mca_btl_portals4_component.portals_free_list_init_num,
mca_btl_portals4_component.portals_free_list_max_num,
mca_btl_portals4_component.portals_free_list_inc_num,
NULL);
/* user frags */
ompi_free_list_init_new(&(mca_btl_portals4_module.portals_frag_user),
sizeof(mca_btl_portals4_frag_user_t),
opal_cache_line_size,
OBJ_CLASS(mca_btl_portals4_frag_user_t),
0,opal_cache_line_size,
mca_btl_portals4_component.portals_free_list_init_num,
mca_btl_portals4_component.portals_free_list_max_num,
mca_btl_portals4_component.portals_free_list_inc_num,
NULL);
/* receive block list */
OBJ_CONSTRUCT(&(mca_btl_portals4_module.portals_recv_blocks), opal_list_t);
mca_btl_portals4_module.portals_ni_h = PTL_INVALID_HANDLE;
mca_btl_portals4_module.zero_md_h = PTL_INVALID_HANDLE;
if (MEMORY_MAX_SIZE > mca_btl_portals4_module.fixed_md_distance)
fixed_md_nb = MEMORY_MAX_SIZE/mca_btl_portals4_module.fixed_md_distance;
else fixed_md_nb = 1;
if (fixed_md_nb > 32) mca_btl_portals4_module.fixed_md_distance = 0;
else {
/* Allocate the md_h table */
mca_btl_portals4_module.fixed_md_h = malloc(fixed_md_nb * sizeof(ptl_handle_md_t));
for (i=0; i<fixed_md_nb; i++) mca_btl_portals4_module.fixed_md_h[i] = PTL_INVALID_HANDLE;
}
mca_btl_portals4_module.long_overflow_me_h = PTL_INVALID_HANDLE;
mca_btl_portals4_module.portals_outstanding_ops = 0;
mca_btl_portals4_module.recv_idx = (ptl_pt_index_t) ~0UL;
return OMPI_SUCCESS;
}
static int
mca_btl_portals4_component_close(void)
{
opal_output_verbose(50, ompi_btl_base_framework.framework_output, "mca_btl_portals4_component_close\n");
/* release resources */
/* close debugging stream */
opal_output_close(ompi_btl_base_framework.framework_output);
ompi_btl_base_framework.framework_output = -1;
OBJ_DESTRUCT(&mca_btl_portals4_module.portals_frag_eager);
OBJ_DESTRUCT(&mca_btl_portals4_module.portals_frag_max);
OBJ_DESTRUCT(&mca_btl_portals4_module.portals_frag_user);
OBJ_DESTRUCT(&mca_btl_portals4_module.portals_recv_blocks);
return OMPI_SUCCESS;
}
static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
bool enable_progress_threads,
bool enable_mpi_threads)
{
int ret;
ptl_process_t ptl_process_id;
ptl_md_t md;
ptl_me_t me;
opal_output_verbose(50, ompi_btl_base_framework.framework_output, "mca_btl_portals4_component_init\n");
mca_btl_base_module_t **btls = malloc(sizeof(mca_btl_base_module_t*));
btls[0] = (mca_btl_base_module_t*) &mca_btl_portals4_module;
if (enable_progress_threads || enable_mpi_threads) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"btl portals4 disabled because threads enabled");
return NULL;
}
/* Initialize Portals and create a physical, matching interface */
ret = PtlInit();
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlInit failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlInit OK\n"));
/*
* Initialize a network device
*/
ret = PtlNIInit(PTL_IFACE_DEFAULT,
PTL_NI_PHYSICAL | PTL_NI_MATCHING,
PTL_PID_ANY, /* let library assign our pid */
NULL, /* no desired limits */
NULL, /* actual limits */
&mca_btl_portals4_module.portals_ni_h /* our interface handle */
);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlNIInit failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlNIInit OK\n"));
/* Publish our NID/PID in the modex */
ret = PtlGetId(mca_btl_portals4_module.portals_ni_h ,&ptl_process_id);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlGetId failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
mca_btl_portals4_module.ptl_process_id = ptl_process_id;
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"PtlGetId nid=%x pid=%x\n", ptl_process_id.phys.nid, ptl_process_id.phys.pid));
ret = ompi_modex_send(&mca_btl_portals4_component.super.btl_version,
&ptl_process_id, sizeof(ptl_process_t));
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: ompi_modex_send failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
/* create event queue */
ret = PtlEQAlloc(mca_btl_portals4_module.portals_ni_h,
mca_btl_portals4_module.recv_queue_size,
&mca_btl_portals4_module.recv_eq_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlEQAlloc failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlEQAlloc (recv_queue) OK\n"));
/* Create recv_idx portal table entrie */
ret = PtlPTAlloc(mca_btl_portals4_module.portals_ni_h,
PTL_PT_ONLY_USE_ONCE |
PTL_PT_ONLY_TRUNCATE,
mca_btl_portals4_module.recv_eq_h,
REQ_RECV_TABLE_ID,
&mca_btl_portals4_module.recv_idx);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlPTAlloc failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"PtlPTAlloc (recv_idx) OK recv_idx=%d\n", mca_btl_portals4_module.recv_idx));
/* bind zero-length md for sending acks */
md.start = NULL;
md.length = 0;
md.options = 0;
md.eq_handle = PTL_EQ_NONE;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(mca_btl_portals4_module.portals_ni_h,
&md,
&mca_btl_portals4_module.zero_md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlMDBind (zero-length md) OK\n"));
/* bind fixed md across all of memory */
if (mca_btl_portals4_module.fixed_md_distance) {
unsigned int i;
uint64_t fixed_md_nb, fixed_md_distance;
fixed_md_distance = mca_btl_portals4_module.fixed_md_distance;
if (MEMORY_MAX_SIZE > fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/fixed_md_distance;
else fixed_md_nb = 1;
opal_output_verbose(1, ompi_btl_base_framework.framework_output, "Fixed MDs :\n");
/* Bind the fixed MDs */
for (i=0; i<fixed_md_nb; i++) {
uint64_t offset = i * fixed_md_distance;
/* if the most significant bit of the address space is set, set the extended address bits */
if (offset & (MEMORY_MAX_SIZE >> 1)) offset += EXTENDED_ADDR;
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
" %2d: [ %16lx - %16lx ]\n", i, offset, offset + fixed_md_distance - 2);
md.start = (char *) offset;
md.length = fixed_md_distance - 1;
md.options = 0;
md.eq_handle = mca_btl_portals4_module.recv_eq_h;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(mca_btl_portals4_module.portals_ni_h,
&md,
&mca_btl_portals4_module.fixed_md_h[i]);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlMDBind (all memory) OK\n"));
}
else opal_output_verbose(1, ompi_btl_base_framework.framework_output, "No fixed MD\n");
/* Handle long overflows */
me.start = NULL;
me.length = 0;
me.ct_handle = PTL_CT_NONE;
me.min_free = 0;
me.uid = PTL_UID_ANY;
me.options = PTL_ME_OP_PUT |
PTL_ME_EVENT_LINK_DISABLE |
PTL_ME_EVENT_COMM_DISABLE |
PTL_ME_EVENT_UNLINK_DISABLE;
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
me.match_bits = BTL_PORTALS4_LONG_MSG;
me.ignore_bits = BTL_PORTALS4_CONTEXT_MASK |
BTL_PORTALS4_SOURCE_MASK |
BTL_PORTALS4_TAG_MASK;
ret = PtlMEAppend(mca_btl_portals4_module.portals_ni_h,
mca_btl_portals4_module.recv_idx,
&me,
PTL_OVERFLOW_LIST,
NULL,
&mca_btl_portals4_module.long_overflow_me_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlMEAppend failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlMEAppend (overflow list) OK\n"));
*num_btls = 1;
opal_output_verbose(1, ompi_btl_base_framework.framework_output, "btl portals4 module has been initialized");
return btls;
error:
opal_output_verbose(1, ompi_btl_base_framework.framework_output, "Error in mca_btl_portals4_component_init\n");
free(btls);
if (mca_btl_portals4_module.fixed_md_distance) {
int i;
int fixed_md_nb;
if (MEMORY_MAX_SIZE > mca_btl_portals4_module.fixed_md_distance) fixed_md_nb = MEMORY_MAX_SIZE/mca_btl_portals4_module.fixed_md_distance;
else fixed_md_nb = 1;
for (i=0; i<fixed_md_nb; i++) {
if (!PtlHandleIsEqual(mca_btl_portals4_module.fixed_md_h[i], PTL_INVALID_HANDLE)) {
PtlMDRelease(mca_btl_portals4_module.fixed_md_h[i]);
}
}
}
/* Free also other portals4 resources */
return NULL;
}
int
mca_btl_portals4_get_error(int ptl_error)
{
int ret;
switch (ptl_error) {
case PTL_OK:
ret = OMPI_SUCCESS;
break;
case PTL_ARG_INVALID:
ret = OMPI_ERR_BAD_PARAM;
break;
case PTL_CT_NONE_REACHED:
ret = OMPI_ERR_TIMEOUT;
break;
case PTL_EQ_DROPPED:
ret = OMPI_ERR_OUT_OF_RESOURCE;
break;
case PTL_EQ_EMPTY:
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
break;
case PTL_FAIL:
ret = OMPI_ERROR;
break;
case PTL_IN_USE:
ret = OMPI_ERR_RESOURCE_BUSY;
break;
case PTL_INTERRUPTED:
ret = OMPI_ERR_RESOURCE_BUSY;
break;
case PTL_LIST_TOO_LONG:
ret = OMPI_ERR_OUT_OF_RESOURCE;
break;
case PTL_NO_INIT:
ret = OMPI_ERR_FATAL;
break;
case PTL_NO_SPACE:
ret = OMPI_ERR_OUT_OF_RESOURCE;
break;
case PTL_PID_IN_USE:
ret = OMPI_ERR_BAD_PARAM;
break;
case PTL_PT_FULL:
ret = OMPI_ERR_OUT_OF_RESOURCE;
break;
case PTL_PT_EQ_NEEDED:
ret = OMPI_ERR_FATAL;
break;
case PTL_PT_IN_USE:
ret = OMPI_ERR_RESOURCE_BUSY;
break;
default:
ret = OMPI_ERROR;
}
return ret;
}
int
mca_btl_portals4_component_progress(void)
{
int num_progressed = 0;
int ret, btl_ownership;
mca_btl_portals4_frag_t *frag = NULL;
mca_btl_base_tag_t tag;
static ptl_event_t ev;
mca_btl_active_message_callback_t* reg;
mca_btl_base_segment_t seg[2];
if (0 == mca_btl_portals4_module.portals_num_procs) return 0;
while (true) {
ret = PtlEQGet(mca_btl_portals4_module.recv_eq_h, &ev);
if (PTL_OK == ret) {
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlEQGet Event received: %d (%d)\n", ev.type, ev.ni_fail_type));
num_progressed++;
switch (ev.type) {
case PTL_EVENT_SEND: /* generated on source (origin) when put stops sending */
frag = ev.user_ptr;
btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
if (!mca_btl_portals4_component.portals_need_ack) {
/* my part's done, in portals we trust! */
if( MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ){
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"PTL_EVENT_SEND: Direct call to des_cbfunc: %lx\n", (uint64_t)frag->base.des_cbfunc));
frag->base.des_cbfunc(&mca_btl_portals4_module.super,
frag->endpoint,
&frag->base,
OMPI_SUCCESS);
}
if (btl_ownership) {
mca_btl_portals4_free(&mca_btl_portals4_module.super, &frag->base);
}
if (0 != frag->size) {
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"PTL_EVENT_SEND: Decrementing portals_outstanding_ops=%d (1)\n",
mca_btl_portals4_module.portals_outstanding_ops));
}
}
goto done;
break;
case PTL_EVENT_ACK: /* Ack that a put as completed on other side. We just call the callback function */
frag = ev.user_ptr;
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"PTL_EVENT_ACK received rlength=%ld mlength=%ld des_flags=%d\n", ev.rlength, ev.mlength, frag->base.des_flags));
btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
/* other side received the message. should have
received entire thing */
/* let the PML know we're done */
if (MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ) {
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"PTL_EVENT_ACK: Call to des_cbfunc %lx\n", (uint64_t)frag->base.des_cbfunc));
frag->base.des_cbfunc(&mca_btl_portals4_module.super,
frag->endpoint,
&frag->base,
OMPI_SUCCESS);
}
if (btl_ownership) {
mca_btl_portals4_free(&mca_btl_portals4_module.super, &frag->base);
}
if (0 != frag->size) {
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", mca_btl_portals4_module.portals_outstanding_ops));
}
goto done;
break;
case PTL_EVENT_PUT: /* Generated on destination (target) when a put into memory ends */
frag = ev.user_ptr;
tag = (unsigned char) (ev.hdr_data);
frag->base.des_dst = seg;
seg[0].seg_addr.pval = ev.start;
seg[0].seg_len = ev.mlength;
frag->base.des_dst_cnt = 1;
reg = mca_btl_base_active_message_trigger + tag;
OPAL_OUTPUT_VERBOSE((50, ompi_btl_base_framework.framework_output,
"PTL_EVENT_PUT: tag=%x frag=%p cbfunc: %lx\n", tag, (void*)frag, (uint64_t)reg->cbfunc));
reg->cbfunc(&mca_btl_portals4_module.super, tag, &(frag->base), reg->cbdata);
goto done;
break;
case PTL_EVENT_PUT_OVERFLOW:
/* */
goto done;
break;
case PTL_EVENT_LINK:
/* */
goto done;
break;
case PTL_EVENT_AUTO_UNLINK:
/* */
/* This activation should be done for PTL_EVENT_AUTO_FREE */
mca_btl_portals4_activate_block(ev.user_ptr);
goto done;
break;
case PTL_EVENT_AUTO_FREE:
/* */
goto done;
break;
case PTL_EVENT_GET:
/* */
goto done;
break;
case PTL_EVENT_REPLY:
/* */
frag = ev.user_ptr;
if (PTL_NI_PERM_VIOLATION == ev.ni_fail_type) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"Warning : PTL_EVENT_REPLY with PTL_NI_PERM_VIOLATION received, try to re-issue a PtlGet");
/* The distant PtlMEAppend is not finished (distant PTL_EVENT_LINK not received) */
/* Re-issue the PtlGet (see btl_portals4_rdma.c) */
ret = PtlGet(frag->md_h,
0,
frag->length,
frag->peer_proc,
mca_btl_portals4_module.recv_idx,
frag->match_bits, /* match bits */
0,
frag);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: Re-issued PtlGet failed: %d",
__FILE__, __LINE__, ret);
PtlMDRelease(frag->md_h);
frag->md_h = PTL_INVALID_HANDLE;
return OMPI_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"Re-issued PtlGet length=%ld recv_idx=%d pid=%x match_bits=%lx\n",
frag->length, mca_btl_portals4_module.recv_idx, frag->peer_proc.phys.pid, frag->match_bits));
}
else {
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"PTL_EVENT_REPLY: Call to des_cbfunc: %lx\n", (uint64_t)frag->base.des_cbfunc));
frag->base.des_cbfunc(&mca_btl_portals4_module.super,
frag->endpoint,
&frag->base,
OMPI_SUCCESS);
PtlMDRelease(frag->md_h);
frag->md_h = PTL_INVALID_HANDLE;
OMPI_BTL_PORTALS4_FRAG_RETURN_USER(&mca_btl_portals4_module.super, frag);
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", mca_btl_portals4_module.portals_outstanding_ops));
goto done;
}
break;
default:
/* */
goto done;
break;
}
} else if (PTL_EQ_EMPTY == ret) {
/* there's nothing in the queue. This is actually the
common case, so the easiest way to make the compiler
emit something that doesn't completely blow here is to
just go back to a good old goto */
goto done;
break;
} else if (PTL_EQ_DROPPED == ret) {
opal_output(ompi_btl_base_framework.framework_output,
"Flow control situation without recovery (EQ_DROPPED)");
break;
} else {
opal_output(ompi_btl_base_framework.framework_output,
"Error returned from PtlEQPoll: %d", ret);
break;
}
}
done:
return num_progressed;
}

Просмотреть файл

@ -0,0 +1,40 @@
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_BTL_PORTALS4_ENDPOINT_H
#define OMPI_BTL_PORTALS4_ENDPOINT_H
#include "btl_portals4.h"
BEGIN_C_DECLS
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_btl_base_endpoint_t is associated w/ each process
* and BTL pair at startup. However, connections to the endpoint
* are established dynamically on an as-needed basis:
*/
struct mca_btl_base_endpoint_t {
ptl_process_t ptl_proc;
};
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
END_C_DECLS
#endif /* MCA_BTL_PORTALS4_ENDPOINT_H */

100
ompi/mca/btl/portals4/btl_portals4_frag.c Обычный файл
Просмотреть файл

@ -0,0 +1,100 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "btl_portals4.h"
#include "btl_portals4_frag.h"
static void
mca_btl_portals4_frag_common_send_constructor(mca_btl_portals4_frag_t* frag)
{
frag->base.des_flags = 0;
frag->base.des_dst = 0;
frag->base.des_dst_cnt = 0;
frag->base.des_src = &frag->segments[0].base;
frag->base.des_src_cnt = 2;
frag->segments[0].base.seg_addr.pval = frag + 1;
frag->segments[0].base.seg_len = frag->size;
frag->segments[0].key = 0;
frag->me_h = PTL_INVALID_HANDLE;
}
static void
mca_btl_portals4_frag_eager_constructor(mca_btl_portals4_frag_t* frag)
{
frag->size = mca_btl_portals4_module.super.btl_eager_limit;
mca_btl_portals4_frag_common_send_constructor(frag);
frag->type = BTL_PORTALS4_FRAG_TYPE_EAGER;
}
static void
mca_btl_portals4_frag_eager_destructor(mca_btl_portals4_frag_t* frag)
{
if (PTL_INVALID_HANDLE == frag->me_h) {
/* PtlMDUnlink(frag->me_h); */
frag->me_h = PTL_INVALID_HANDLE;
}
}
static void
mca_btl_portals4_frag_max_constructor(mca_btl_portals4_frag_t* frag)
{
frag->size = mca_btl_portals4_module.super.btl_max_send_size;
mca_btl_portals4_frag_common_send_constructor(frag);
frag->type = BTL_PORTALS4_FRAG_TYPE_MAX;
}
static void
mca_btl_portals4_frag_user_constructor(mca_btl_portals4_frag_t* frag)
{
frag->base.des_flags = 0;
frag->base.des_dst = 0;
frag->base.des_dst_cnt = 0;
frag->base.des_src = 0;
frag->base.des_src_cnt = 0;
frag->size = 0;
frag->type = BTL_PORTALS4_FRAG_TYPE_USER;
}
OBJ_CLASS_INSTANCE(
mca_btl_portals4_frag_t,
mca_btl_base_descriptor_t,
NULL,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_portals4_frag_eager_t,
mca_btl_base_descriptor_t,
mca_btl_portals4_frag_eager_constructor,
mca_btl_portals4_frag_eager_destructor);
OBJ_CLASS_INSTANCE(
mca_btl_portals4_frag_max_t,
mca_btl_base_descriptor_t,
mca_btl_portals4_frag_max_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_portals4_frag_user_t,
mca_btl_base_descriptor_t,
mca_btl_portals4_frag_user_constructor,
NULL);

134
ompi/mca/btl/portals4/btl_portals4_frag.h Обычный файл
Просмотреть файл

@ -0,0 +1,134 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_BTL_PORTALS4_FRAG_H
#define OMPI_BTL_PORTALS4_FRAG_H
#include "ompi/mca/btl/btl.h"
BEGIN_C_DECLS
struct mca_btl_portals4_segment_t {
mca_btl_base_segment_t base;
ptl_match_bits_t key;
};
typedef struct mca_btl_portals4_segment_t mca_btl_portals4_segment_t;
/**
* Portals send fragment derived type
*/
struct mca_btl_portals4_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_portals4_segment_t segments[1];
/* needed for retransmit case */
struct mca_btl_base_endpoint_t *endpoint;
/* needed for retransmit case */
mca_btl_base_header_t hdr;
/* handle to use for communication */
ptl_handle_me_t me_h;
/* handle to use for communication */
ptl_handle_md_t md_h;
/* size of the allocated memory region -- not the amount of data
we need to send */
size_t size;
/* match bits for retransmit case */
ptl_match_bits_t match_bits;
/* length for retransmit case */
ptl_size_t length;
/* length for retransmit case */
ptl_process_t peer_proc;
enum { BTL_PORTALS4_FRAG_TYPE_EAGER,
BTL_PORTALS4_FRAG_TYPE_MAX,
BTL_PORTALS4_FRAG_TYPE_USER } type;
unsigned char data[16];
};
typedef struct mca_btl_portals4_frag_t mca_btl_portals4_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_portals4_frag_t);
typedef struct mca_btl_portals4_frag_t mca_btl_portals4_frag_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_portals4_frag_eager_t);
typedef struct mca_btl_portals4_frag_t mca_btl_portals4_frag_max_t;
OBJ_CLASS_DECLARATION(mca_btl_portals4_frag_max_t);
typedef struct mca_btl_portals4_frag_t mca_btl_portals4_frag_user_t;
OBJ_CLASS_DECLARATION(mca_btl_portals4_frag_user_t);
/*
* Macros to allocate/return descriptors from module specific
* free list(s).
*/
#define OMPI_BTL_PORTALS4_FRAG_ALLOC_EAGER(btl_macro, frag) \
{ \
\
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET_MT(&((mca_btl_portals4_module_t*)btl_macro)->portals_frag_eager, item); \
frag = (mca_btl_portals4_frag_t*) item; \
if (NULL == frag) { \
OMPI_BTL_PORTALS4_FRAG_ALLOC_MAX(btl_macro, frag); \
} \
}
#define OMPI_BTL_PORTALS4_FRAG_RETURN_EAGER(btl_macro, frag) \
{ \
assert(BTL_PORTALS4_FRAG_TYPE_EAGER == frag->type); \
OMPI_FREE_LIST_RETURN_MT(&((mca_btl_portals4_module_t*)btl_macro)->portals_frag_eager, \
(ompi_free_list_item_t*)(frag)); \
}
#define OMPI_BTL_PORTALS4_FRAG_ALLOC_MAX(btl_macro, frag) \
{ \
\
ompi_free_list_item_t *item_macro; \
OMPI_FREE_LIST_GET_MT(&((mca_btl_portals4_module_t*)btl_macro)->portals_frag_max, item_macro); \
frag = (mca_btl_portals4_frag_t*) item_macro; \
}
#define OMPI_BTL_PORTALS4_FRAG_RETURN_MAX(btl_macro, frag) \
{ \
assert(BTL_PORTALS4_FRAG_TYPE_MAX == frag->type); \
OMPI_FREE_LIST_RETURN_MT(&((mca_btl_portals4_module_t*)btl_macro)->portals_frag_max, \
(ompi_free_list_item_t*)(frag)); \
}
#define OMPI_BTL_PORTALS4_FRAG_ALLOC_USER(btl_macro, frag) \
{ \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET_MT(&((mca_btl_portals4_module_t*)btl_macro)->portals_frag_user, item); \
frag = (mca_btl_portals4_frag_t*) item; \
frag->base.des_cbfunc = NULL; \
}
#define OMPI_BTL_PORTALS4_FRAG_RETURN_USER(btl_macro, frag) \
{ \
assert(BTL_PORTALS4_FRAG_TYPE_USER == frag->type); \
OMPI_FREE_LIST_RETURN_MT(&((mca_btl_portals4_module_t*)btl_macro)->portals_frag_user, \
(ompi_free_list_item_t*)(frag)); \
}
END_C_DECLS
#endif

95
ompi/mca/btl/portals4/btl_portals4_rdma.c Обычный файл
Просмотреть файл

@ -0,0 +1,95 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/constants.h"
#include "btl_portals4.h"
int
mca_btl_portals4_put(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor)
{
opal_output(0, "mca_btl_portals4_put not implemented\n");
MPI_Abort(MPI_COMM_WORLD, 10);
return OMPI_SUCCESS;
}
int
mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor)
{
mca_btl_portals4_segment_t *src_seg = (mca_btl_portals4_segment_t *) descriptor->des_src;
mca_btl_portals4_frag_t *frag = (mca_btl_portals4_frag_t*) descriptor;
ptl_md_t md;
int ret;
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"mca_btl_portals4_get frag=%p src_seg=%p frag->md_h=%d\n", (void *)frag, (void *)src_seg, frag->md_h));
assert(&mca_btl_portals4_module == (mca_btl_portals4_module_t*) btl_base);
frag->endpoint = btl_peer;
frag->hdr.tag = MCA_BTL_TAG_MAX;
/* Bind the memory */
md.start = (void *)frag->segments[0].base.seg_addr.pval;
md.length = frag->segments[0].base.seg_len;
md.options = 0;
md.eq_handle = mca_btl_portals4_module.recv_eq_h;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(mca_btl_portals4_module.portals_ni_h,
&md,
&frag->md_h);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d",
__FILE__, __LINE__, ret);
return OMPI_ERROR;
}
frag->match_bits = src_seg->key;
frag->length = md.length;
frag->peer_proc = btl_peer->ptl_proc;
ret = PtlGet(frag->md_h,
0,
md.length,
btl_peer->ptl_proc,
mca_btl_portals4_module.recv_idx,
frag->match_bits, /* match bits */
0,
frag);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlGet failed: %d",
__FILE__, __LINE__, ret);
PtlMDRelease(frag->md_h);
frag->md_h = PTL_INVALID_HANDLE;
return OMPI_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));
return OMPI_SUCCESS;
}

102
ompi/mca/btl/portals4/btl_portals4_recv.c Обычный файл
Просмотреть файл

@ -0,0 +1,102 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/constants.h"
#include "btl_portals4.h"
#include "btl_portals4_recv.h"
#include "btl_portals4_frag.h"
OBJ_CLASS_INSTANCE(mca_btl_portals4_recv_block_t,
opal_list_item_t,
NULL, NULL);
int
mca_btl_portals4_recv_enable(mca_btl_portals4_module_t *btl)
{
int i;
/* create the recv blocks */
for (i = 0 ; i < btl->portals_recv_mds_num ; ++i) {
mca_btl_portals4_recv_block_t *block =
mca_btl_portals4_recv_block_init(btl);
if (NULL == block) {
mca_btl_portals4_recv_disable(btl);
return OMPI_ERROR;
}
opal_list_append(&(btl->portals_recv_blocks),
(opal_list_item_t*) block);
mca_btl_portals4_activate_block(block);
}
return OMPI_SUCCESS;
}
int
mca_btl_portals4_recv_disable(mca_btl_portals4_module_t *btl)
{
opal_list_item_t *item;
if (opal_list_get_size(&btl->portals_recv_blocks) > 0) {
while (NULL !=
(item = opal_list_remove_first(&btl->portals_recv_blocks))) {
mca_btl_portals4_recv_block_t *block =
(mca_btl_portals4_recv_block_t*) item;
mca_btl_portals4_recv_block_free(block);
}
}
return OMPI_SUCCESS;
}
mca_btl_portals4_recv_block_t*
mca_btl_portals4_recv_block_init(mca_btl_portals4_module_t *btl)
{
mca_btl_portals4_recv_block_t *block;
block = OBJ_NEW(mca_btl_portals4_recv_block_t);
block->btl = btl;
block->length = btl->portals_recv_mds_size;
block->start = malloc(block->length);
if (block->start == NULL) return NULL;
block->me_h = PTL_INVALID_HANDLE;
block->md_h = PTL_INVALID_HANDLE;
block->full = false;
block->pending = 0;
return block;
}
int
mca_btl_portals4_recv_block_free(mca_btl_portals4_recv_block_t *block)
{
if (NULL != block->start) {
free(block->start);
block->start = NULL;
}
block->length = 0;
block->full = false;
return OMPI_SUCCESS;
}

118
ompi/mca/btl/portals4/btl_portals4_recv.h Обычный файл
Просмотреть файл

@ -0,0 +1,118 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_BTL_PORTALS4_RECV_H
#define OMPI_BTL_PORTALS4_RECV_H
#include "btl_portals4_frag.h"
struct mca_btl_portals4_recv_block_t {
opal_list_item_t base;
mca_btl_portals4_module_t *btl;
void *start;
size_t length;
ptl_handle_me_t me_h;
ptl_handle_md_t md_h;
volatile bool full;
volatile int32_t pending;
};
typedef struct mca_btl_portals4_recv_block_t mca_btl_portals4_recv_block_t;
OBJ_CLASS_DECLARATION(mca_btl_portals4_recv_block_t);
int mca_btl_portals4_recv_enable(mca_btl_portals4_module_t *btl);
int mca_btl_portals4_recv_disable(mca_btl_portals4_module_t *btl);
/**
* Free a block of memory.
*
*/
int mca_btl_portals4_recv_block_free(mca_btl_portals4_recv_block_t *block);
/**
* Create a block of memory for receiving send messages. Must call
* activate_block on the returned block of memory before it will be
* active with the Portals library
*
* Module lock must be held before calling this function
*/
mca_btl_portals4_recv_block_t*
mca_btl_portals4_recv_block_init(mca_btl_portals4_module_t *btl);
/**
* activate a block. Blocks that are full (have gone inactive) can be
* re-activated with this call. There is no need to hold the lock
* before calling this function
*/
static inline int
mca_btl_portals4_activate_block(mca_btl_portals4_recv_block_t *block)
{
int ret;
ptl_me_t me;
ptl_process_t remote_proc;
ptl_match_bits_t match_bits, ignore_bits;
if (NULL == block->start) return OMPI_ERROR;
ignore_bits = BTL_PORTALS4_CONTEXT_MASK | BTL_PORTALS4_SOURCE_MASK | BTL_PORTALS4_TAG_MASK;
match_bits = BTL_PORTALS4_SHORT_MSG;
me.start = block->start;
me.length = block->length;
me.ct_handle = PTL_CT_NONE;
me.min_free = mca_btl_portals4_module.super.btl_eager_limit;
me.uid = PTL_UID_ANY;
me.options =
PTL_ME_OP_PUT |
PTL_ME_MANAGE_LOCAL |
PTL_ME_EVENT_LINK_DISABLE |
PTL_ME_MAY_ALIGN;
remote_proc.phys.nid = PTL_NID_ANY;
remote_proc.phys.pid = PTL_PID_ANY;
me.match_id = remote_proc;
me.match_bits = match_bits;
me.ignore_bits = ignore_bits;
block->pending = 0;
block->full = false;
opal_atomic_mb();
ret = PtlMEAppend(mca_btl_portals4_module.portals_ni_h,
mca_btl_portals4_module.recv_idx,
&me,
PTL_PRIORITY_LIST,
block,
&block->me_h);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlMEAppend failed: %d",
__FILE__, __LINE__, ret);
return OMPI_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "PtlMEAppend (recv) block=%p me_h=%d start=%p len=%x\n", (void *)block, block->me_h, block->start, (unsigned int) block->length));
return OMPI_SUCCESS;
}
#endif /* OMPI_BTL_PORTALS4_RECV_H */

169
ompi/mca/btl/portals4/btl_portals4_send.c Обычный файл
Просмотреть файл

@ -0,0 +1,169 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/constants.h"
#include "opal/datatype/opal_convertor.h"
#include "btl_portals4.h"
static int mca_btl_portals4_try_to_use_fixed_md(void *start,
int length,
ptl_handle_md_t *md_h,
int64_t *offset,
mca_btl_portals4_frag_t *frag);
int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag)
{
mca_btl_portals4_frag_t *frag = (mca_btl_portals4_frag_t*) descriptor;
ptl_match_bits_t match_bits, msglen_type;
ptl_size_t put_length, put_local_offset;
int64_t offset;
ptl_handle_md_t md_h;
int ret;
frag->endpoint = endpoint;
frag->hdr.tag = tag;
put_local_offset = (ptl_size_t) frag->segments[0].base.seg_addr.pval;
put_length = frag->segments[0].base.seg_len;
if (put_length > mca_btl_portals4_module.super.btl_eager_limit)
msglen_type = BTL_PORTALS4_LONG_MSG;
else msglen_type = BTL_PORTALS4_SHORT_MSG;
BTL_PORTALS4_SET_SEND_BITS(match_bits, 0, 0, tag, msglen_type);
/* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, 1) >
mca_btl_portals4_module.portals_max_outstanding_ops) {
OPAL_THREAD_ADD32(&mca_btl_portals4_module.portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (4)\n"));
mca_btl_portals4_component_progress();
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "mca_btl_portals4_send: Incrementing portals_outstanding_ops=%d\n",
mca_btl_portals4_module.portals_outstanding_ops));
ret = mca_btl_portals4_try_to_use_fixed_md((void*)put_local_offset, put_length, &md_h, &offset, frag);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
OPAL_OUTPUT_VERBOSE((50, ompi_btl_base_framework.framework_output,
"PtlPut frag=%p pid=%x tag=%x addr=%p len=%ld match_bits=%lx\n",
(void*)frag, endpoint->ptl_proc.phys.pid, tag,
(void *)put_local_offset, put_length, (uint64_t)match_bits));
ret = PtlPut(md_h,
(ptl_size_t) offset,
put_length, /* fragment length */
(mca_btl_portals4_component.portals_need_ack ? PTL_ACK_REQ : PTL_NO_ACK_REQ),
endpoint->ptl_proc,
mca_btl_portals4_module.recv_idx,
match_bits, /* match bits */
0, /* remote offset - not used */
(void *) frag, /* user ptr */
tag); /* hdr_data: tag */
if (ret != PTL_OK) {
opal_output(0, "mca_btl_portals4_send: PtlPut failed with error %d", ret);
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
/* NOT IMPLEMENTED */
int mca_btl_portals4_sendi(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* endpoint,
struct opal_convertor_t* convertor,
void* header,
size_t header_size,
size_t payload_size,
uint8_t order,
uint32_t flags,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t** des)
{
opal_output(0, "mca_btl_portals_sendi is not implemented");
abort();
return OMPI_SUCCESS;
}
static int
mca_btl_portals4_try_to_use_fixed_md(void *start,
int length,
ptl_handle_md_t *md_h,
int64_t *offset,
mca_btl_portals4_frag_t *frag)
{
int ret;
ptl_md_t md;
int64_t addr;
addr = ((int64_t)start & ~EXTENDED_ADDR);
/* If fixed_md_distance is defined for MD and if the memory buffer is strictly contained in one of them, then use one */
if ((0 != mca_btl_portals4_module.fixed_md_distance) &&
(((addr % mca_btl_portals4_module.fixed_md_distance) + length) < mca_btl_portals4_module.fixed_md_distance)) {
if (0 == length) OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
" Memory : [ %16lx - (len = 0) ] is in fixed MD number: %d\n",
(unsigned long) start, (int) (addr / mca_btl_portals4_module.fixed_md_distance)));
else OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
" Memory : [ %16lx - %16lx ] is in fixed MD number: %d\n",
(unsigned long) start, (long int)start + length - 1, (int)(addr / mca_btl_portals4_module.fixed_md_distance)));
/* Use the fixed MD */
*md_h = mca_btl_portals4_module.fixed_md_h[addr / mca_btl_portals4_module.fixed_md_distance];
*offset = (addr % mca_btl_portals4_module.fixed_md_distance);
frag->md_h = PTL_INVALID_HANDLE;
}
else {
if (0 == mca_btl_portals4_module.fixed_md_distance)
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"\nWARNING: Memory cannot be connected to a fixed MD\n"));
else OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output,
"\nWARNING: Memory outside the scope of the fixed MD %d\n",
(int)(addr / mca_btl_portals4_module.fixed_md_distance)));
/* Bind the MD (and unbind it where necessary) */
md.start = start;
md.length = length;
md.options = 0;
md.eq_handle = mca_btl_portals4_module.recv_eq_h;
md.ct_handle = PTL_CT_NONE;
ret = PtlMDBind(mca_btl_portals4_module.portals_ni_h,
&md,
&frag->md_h);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_btl_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
return mca_btl_portals4_get_error(ret);
}
*md_h = frag->md_h;
*offset = 0;
}
OPAL_OUTPUT_VERBOSE((90, ompi_btl_base_framework.framework_output, "try_to_use_fixed_md: frag=%p start=%p len=%lx offset=%lx\n",
(void*)frag, (void *)start, (unsigned long)length, (unsigned long)*offset));
return OMPI_SUCCESS;
}

63
ompi/mca/btl/portals4/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,63 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2010 Sandia National Laboratories. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_btl_portals4_CONFIG(action-if-can-compile,
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_ompi_btl_portals4_CONFIG],[
AC_CONFIG_FILES([ompi/mca/btl/portals4/Makefile])
OMPI_CHECK_PORTALS4([btl_portals4],
[btl_portals4_happy="yes"],
[btl_portals4_happy="no"])
AS_IF([test "$btl_portals4_happy" = "yes"],
[btl_portals4_WRAPPER_EXTRA_LDFLAGS="$btl_portals4_LDFLAGS"
btl_portals4_WRAPPER_EXTRA_LIBS="$btl_portals4_LIBS"
$1],
[$2])
# need to propogate CPPFLAGS to all of OMPI
AS_IF([test "$DIRECT_btl" = "portals4"],
[CPPFLAGS="$CPPFLAGS $btl_portals4_CPPFLAGS"])
AC_ARG_ENABLE([btl-portals4-flow-control],
[AC_HELP_STRING([--enable-btl-portals4-flow-control],
[enable flow control for Portals 4 BTL (default: disabled)])])
AC_MSG_CHECKING([whether to enable flow control])
if test "$enable_btl_portals4_flow_control" != "yes"; then
AC_MSG_RESULT([no])
btl_portals4_flow_control_enabled=0
else
AC_MSG_RESULT([no])
btl_portals4_flow_control_enabled=0
fi
AC_DEFINE_UNQUOTED([OMPI_BTL_PORTALS4_FLOW_CONTROL],
[$btl_portals4_flow_control_enabled],
[Enable flow control for Portals4 BTL])
AM_CONDITIONAL([OMPI_BTL_PORTALS4_FLOW_CONTROL],
[test "$btl_portals4_flow_control_enabled" = "1"])
# substitute in the things needed to build portals4
AC_SUBST([btl_portals4_CPPFLAGS])
AC_SUBST([btl_portals4_LDFLAGS])
AC_SUBST([btl_portals4_LIBS])
])dnl