346 строки
13 KiB
C
346 строки
13 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* Copyright (c) 2014 Research Organization for Information Science
|
|
* and Technology (RIST). All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "opal_config.h"
|
|
|
|
#include "btl_scif.h"
|
|
#include "btl_scif_frag.h"
|
|
#include "btl_scif_endpoint.h"
|
|
|
|
static int
|
|
mca_btl_scif_free (struct mca_btl_base_module_t *btl,
|
|
mca_btl_base_descriptor_t *des);
|
|
|
|
static int
|
|
mca_btl_scif_module_finalize (struct mca_btl_base_module_t* btl);
|
|
|
|
static mca_btl_base_descriptor_t *
|
|
mca_btl_scif_prepare_dst (mca_btl_base_module_t *btl,
|
|
mca_btl_base_endpoint_t *endpoint,
|
|
mca_mpool_base_registration_t *registration,
|
|
opal_convertor_t *convertor, uint8_t order,
|
|
size_t reserve, size_t *size, uint32_t flags);
|
|
|
|
static struct mca_btl_base_descriptor_t *
|
|
mca_btl_scif_prepare_src (struct mca_btl_base_module_t *btl,
|
|
struct mca_btl_base_endpoint_t *endpoint,
|
|
mca_mpool_base_registration_t *registration,
|
|
struct opal_convertor_t *convertor,
|
|
uint8_t order, size_t reserve, size_t *size,
|
|
uint32_t flags);
|
|
|
|
mca_btl_scif_module_t mca_btl_scif_module = {
|
|
.super = {
|
|
.btl_component = &mca_btl_scif_component.super,
|
|
.btl_add_procs = mca_btl_scif_add_procs,
|
|
.btl_del_procs = mca_btl_scif_del_procs,
|
|
.btl_finalize = mca_btl_scif_module_finalize,
|
|
.btl_alloc = mca_btl_scif_alloc,
|
|
.btl_free = mca_btl_scif_free,
|
|
.btl_prepare_src = mca_btl_scif_prepare_src,
|
|
.btl_prepare_dst = mca_btl_scif_prepare_dst,
|
|
.btl_send = mca_btl_scif_send,
|
|
.btl_sendi = mca_btl_scif_sendi,
|
|
.btl_put = mca_btl_scif_put,
|
|
.btl_get = mca_btl_scif_get,
|
|
}
|
|
};
|
|
|
|
int mca_btl_scif_module_init (void)
|
|
{
|
|
int rc;
|
|
|
|
/* create an endpoint to listen for connections */
|
|
mca_btl_scif_module.scif_fd = scif_open ();
|
|
if (-1 == mca_btl_scif_module.scif_fd) {
|
|
BTL_VERBOSE(("scif_open failed. errno = %d", errno));
|
|
return OPAL_ERROR;
|
|
}
|
|
|
|
/* bind the endpoint to a port */
|
|
mca_btl_scif_module.port_id.port = scif_bind (mca_btl_scif_module.scif_fd, 0);
|
|
if (-1 == mca_btl_scif_module.port_id.port) {
|
|
BTL_VERBOSE(("scif_bind failed. errno = %d", errno));
|
|
scif_close (mca_btl_scif_module.scif_fd);
|
|
mca_btl_scif_module.scif_fd = -1;
|
|
return OPAL_ERROR;
|
|
}
|
|
|
|
/* determine this processes node id */
|
|
rc = scif_get_nodeIDs (NULL, 0, &mca_btl_scif_module.port_id.node);
|
|
if (-1 == rc) {
|
|
BTL_VERBOSE(("btl/scif error getting node id of this node"));
|
|
return OPAL_ERROR;
|
|
}
|
|
|
|
/* Listen for connections */
|
|
/* TODO - base the maximum backlog off something */
|
|
rc = scif_listen (mca_btl_scif_module.scif_fd, 64);
|
|
if (-1 == rc) {
|
|
BTL_VERBOSE(("scif_listen failed. errno = %d", errno));
|
|
scif_close (mca_btl_scif_module.scif_fd);
|
|
mca_btl_scif_module.scif_fd = -1;
|
|
return OPAL_ERROR;
|
|
}
|
|
|
|
BTL_VERBOSE(("btl/scif: listening @ port %u on node %u\n",
|
|
mca_btl_scif_module.port_id.port, mca_btl_scif_module.port_id.node));
|
|
|
|
OBJ_CONSTRUCT(&mca_btl_scif_module.dma_frags, ompi_free_list_t);
|
|
OBJ_CONSTRUCT(&mca_btl_scif_module.eager_frags, ompi_free_list_t);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int
|
|
mca_btl_scif_module_finalize (struct mca_btl_base_module_t *btl)
|
|
{
|
|
mca_btl_scif_module_t *scif_module = (mca_btl_scif_module_t *) btl;
|
|
unsigned int i;
|
|
|
|
OBJ_DESTRUCT(&mca_btl_scif_module.dma_frags);
|
|
OBJ_DESTRUCT(&mca_btl_scif_module.eager_frags);
|
|
|
|
mca_btl_scif_module.exiting = true;
|
|
|
|
/* close all open connections and release endpoints */
|
|
if (NULL != scif_module->endpoints) {
|
|
for (i = 0 ; i < scif_module->endpoint_count ; ++i) {
|
|
mca_btl_scif_ep_release (scif_module->endpoints + i);
|
|
}
|
|
|
|
free (scif_module->endpoints);
|
|
|
|
scif_module->endpoint_count = 0;
|
|
scif_module->endpoints = NULL;
|
|
}
|
|
|
|
/* close the listening endpoint */
|
|
if (mca_btl_scif_module.listening && -1 != mca_btl_scif_module.scif_fd) {
|
|
/* wake up the scif thread */
|
|
scif_epd_t tmpfd;
|
|
tmpfd = scif_open();
|
|
scif_connect (tmpfd, &mca_btl_scif_module.port_id);
|
|
pthread_join(mca_btl_scif_module.listen_thread, NULL);
|
|
scif_close(tmpfd);
|
|
scif_close (mca_btl_scif_module.scif_fd);
|
|
}
|
|
|
|
mca_btl_scif_module.scif_fd = -1;
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
mca_btl_base_descriptor_t *
|
|
mca_btl_scif_alloc(struct mca_btl_base_module_t *btl,
|
|
struct mca_btl_base_endpoint_t *endpoint,
|
|
uint8_t order, size_t size, uint32_t flags)
|
|
{
|
|
mca_btl_scif_base_frag_t *frag = NULL;
|
|
|
|
BTL_VERBOSE(("allocating fragment of size: %u", (unsigned int)size));
|
|
|
|
if (size <= mca_btl_scif_module.super.btl_eager_limit) {
|
|
(void) MCA_BTL_SCIF_FRAG_ALLOC_EAGER(endpoint, frag);
|
|
}
|
|
|
|
if (OPAL_UNLIKELY(NULL == frag)) {
|
|
return NULL;
|
|
}
|
|
|
|
BTL_VERBOSE(("btl/scif_module allocated frag of size: %u, flags: %x. frag = %p",
|
|
(unsigned int)size, flags, (void *) frag));
|
|
|
|
frag->base.des_flags = flags;
|
|
frag->base.order = order;
|
|
frag->base.des_local = &frag->segments[0].base;
|
|
frag->base.des_local_count = 1;
|
|
|
|
frag->segments[0].base.seg_len = size;
|
|
|
|
return &frag->base;
|
|
}
|
|
|
|
static int
|
|
mca_btl_scif_free (struct mca_btl_base_module_t *btl,
|
|
mca_btl_base_descriptor_t *des)
|
|
{
|
|
return mca_btl_scif_frag_return ((mca_btl_scif_base_frag_t *) des);
|
|
}
|
|
|
|
static inline mca_btl_base_descriptor_t *mca_btl_scif_prepare_dma (struct mca_btl_base_module_t *btl,
|
|
mca_btl_base_endpoint_t *endpoint,
|
|
void *data_ptr, size_t size,
|
|
mca_mpool_base_registration_t *registration,
|
|
uint8_t order, uint32_t flags)
|
|
{
|
|
mca_btl_scif_base_frag_t *frag;
|
|
mca_btl_scif_reg_t *scif_reg;
|
|
int rc;
|
|
|
|
if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
|
|
/* the endpoint needs to be connected before the fragment can be
|
|
* registered. */
|
|
rc = mca_btl_scif_ep_connect (endpoint);
|
|
if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
|
|
/* not yet connected */
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
(void) MCA_BTL_SCIF_FRAG_ALLOC_DMA(endpoint, frag);
|
|
if (OPAL_UNLIKELY(NULL == frag)) {
|
|
return NULL;
|
|
}
|
|
|
|
if (NULL == registration) {
|
|
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, data_ptr, size, 0,
|
|
(mca_mpool_base_registration_t **) ®istration);
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
|
mca_btl_scif_frag_return (frag);
|
|
return NULL;
|
|
}
|
|
|
|
frag->registration = (mca_btl_scif_reg_t *) registration;
|
|
}
|
|
|
|
scif_reg = (mca_btl_scif_reg_t *) registration;
|
|
|
|
/* register the memory location with this peer if it isn't already */
|
|
if ((off_t) -1 == scif_reg->registrations[endpoint->id]) {
|
|
size_t seg_size = (size_t)((uintptr_t) registration->bound - (uintptr_t) registration->base) + 1;
|
|
scif_reg->registrations[endpoint->id] = scif_register (endpoint->scif_epd, registration->base,
|
|
seg_size, 0, SCIF_PROT_READ |
|
|
SCIF_PROT_WRITE, 0);
|
|
BTL_VERBOSE(("registered fragment for scif DMA transaction. offset = %lu",
|
|
(unsigned long) scif_reg->registrations[endpoint->id]));
|
|
}
|
|
|
|
if (OPAL_UNLIKELY((off_t) -1 == scif_reg->registrations[endpoint->id])) {
|
|
mca_btl_scif_frag_return (frag);
|
|
return NULL;
|
|
}
|
|
|
|
frag->segments[0].base.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
|
frag->segments[0].base.seg_len = size;
|
|
frag->segments[0].scif_offset = scif_reg->registrations[endpoint->id] +
|
|
(off_t) ((ptrdiff_t) data_ptr - (ptrdiff_t) registration->base);
|
|
/* save the original pointer so the offset can be adjusted if needed (this is
|
|
* required for osc/rdma) */
|
|
frag->segments[0].orig_ptr = (uint64_t)(uintptr_t) data_ptr;
|
|
frag->base.order = order;
|
|
frag->base.des_flags = flags;
|
|
|
|
frag->base.des_local = &frag->segments->base;
|
|
frag->base.des_local_count = 1;
|
|
|
|
return &frag->base;
|
|
}
|
|
|
|
static inline mca_btl_base_descriptor_t *mca_btl_scif_prepare_dma_conv (struct mca_btl_base_module_t *btl,
|
|
mca_btl_base_endpoint_t *endpoint,
|
|
mca_mpool_base_registration_t *registration,
|
|
struct opal_convertor_t *convertor,
|
|
uint8_t order, size_t *size,
|
|
uint32_t flags)
|
|
{
|
|
void *data_ptr;
|
|
|
|
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
|
|
|
return mca_btl_scif_prepare_dma (btl, endpoint, data_ptr, *size, registration, order, flags);
|
|
}
|
|
|
|
static inline struct mca_btl_base_descriptor_t *
|
|
mca_btl_scif_prepare_src_send (struct mca_btl_base_module_t *btl,
|
|
mca_btl_base_endpoint_t *endpoint,
|
|
struct opal_convertor_t *convertor,
|
|
uint8_t order, size_t reserve, size_t *size,
|
|
uint32_t flags)
|
|
{
|
|
mca_btl_scif_base_frag_t *frag = NULL;
|
|
uint32_t iov_count = 1;
|
|
struct iovec iov;
|
|
size_t max_size = *size;
|
|
int rc;
|
|
|
|
if (OPAL_LIKELY((mca_btl_scif_module.super.btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) &&
|
|
!opal_convertor_need_buffers (convertor) &&
|
|
reserve <= 128)) {
|
|
/* inplace send */
|
|
void *data_ptr;
|
|
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
|
|
|
(void) MCA_BTL_SCIF_FRAG_ALLOC_DMA(endpoint, frag);
|
|
if (OPAL_UNLIKELY(NULL == frag)) {
|
|
return NULL;
|
|
}
|
|
|
|
frag->segments[0].base.seg_len = reserve;
|
|
frag->segments[1].base.seg_addr.pval = data_ptr;
|
|
frag->segments[1].base.seg_len = *size;
|
|
frag->base.des_local_count = 2;
|
|
} else {
|
|
/* buffered send */
|
|
(void) MCA_BTL_SCIF_FRAG_ALLOC_EAGER(endpoint, frag);
|
|
if (OPAL_UNLIKELY(NULL == frag)) {
|
|
return NULL;
|
|
}
|
|
|
|
if (*size) {
|
|
iov.iov_len = *size;
|
|
iov.iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->segments[0].base.seg_addr.pval + reserve);
|
|
|
|
rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_size);
|
|
if (OPAL_UNLIKELY(rc < 0)) {
|
|
mca_btl_scif_frag_return (frag);
|
|
return NULL;
|
|
}
|
|
*size = max_size;
|
|
}
|
|
|
|
frag->segments[0].base.seg_len = reserve + *size;
|
|
frag->base.des_local_count = 1;
|
|
}
|
|
|
|
frag->base.des_local = &frag->segments->base;
|
|
frag->base.order = order;
|
|
frag->base.des_flags = flags;
|
|
|
|
return &frag->base;
|
|
}
|
|
|
|
static mca_btl_base_descriptor_t *mca_btl_scif_prepare_src (struct mca_btl_base_module_t *btl,
|
|
mca_btl_base_endpoint_t *endpoint,
|
|
mca_mpool_base_registration_t *registration,
|
|
struct opal_convertor_t *convertor,
|
|
uint8_t order, size_t reserve, size_t *size,
|
|
uint32_t flags)
|
|
{
|
|
if (OPAL_LIKELY(reserve)) {
|
|
return mca_btl_scif_prepare_src_send (btl, endpoint, convertor,
|
|
order, reserve, size, flags);
|
|
} else {
|
|
return mca_btl_scif_prepare_dma_conv (btl, endpoint, registration, convertor, order, size, flags);
|
|
}
|
|
}
|
|
|
|
static mca_btl_base_descriptor_t *mca_btl_scif_prepare_dst (mca_btl_base_module_t *btl,
|
|
mca_btl_base_endpoint_t *endpoint,
|
|
mca_mpool_base_registration_t *registration,
|
|
opal_convertor_t *convertor, uint8_t order,
|
|
size_t reserve, size_t *size, uint32_t flags)
|
|
{
|
|
return mca_btl_scif_prepare_dma_conv (btl, endpoint, registration, convertor, order, size, flags);
|
|
}
|