1
1

* checkpoint - added a bunch of infrastructure for sends

This commit was SVN r6353.
Этот коммит содержится в:
Brian Barrett 2005-07-05 21:14:29 +00:00
родитель 3d9d67eae9
Коммит d4bd7252a0
13 изменённых файлов: 348 добавлений и 66 удалений

Просмотреть файл

@ -41,6 +41,7 @@ portals_SOURCES = \
src/btl_portals_send.h \
src/btl_portals_recv.h \
src/btl_portals_rdma.h \
src/btl_portals_endpoint.h \
src/btl_portals.c \
src/btl_portals_component.c \
src/btl_portals_frag.c \
@ -48,6 +49,7 @@ portals_SOURCES = \
src/btl_portals_recv.c \
src/btl_portals_rdma.c \
src/btl_portals_stubs.c \
src/btl_portals_endpoint.c \
src/btl_portals_compat_utcp.c

Просмотреть файл

@ -29,6 +29,9 @@
#include "btl_portals.h"
#include "btl_portals_compat.h"
#include "btl_portals_endpoint.h"
#include "btl_portals_recv.h"
#include "btl_portals_frag.h"
mca_btl_portals_module_t mca_btl_portals_module = {
{
@ -71,30 +74,44 @@ mca_btl_portals_add_procs(struct mca_btl_base_module_t* btl,
ompi_bitmap_t* reachable)
{
int ret;
struct ompi_proc_t *local_proc = ompi_proc_local();
struct ompi_proc_t *curr_proc;
ptl_process_id_t *portals_procs;
struct ompi_proc_t *curr_proc = NULL;
ptl_process_id_t *portals_procs = NULL;
size_t i;
unsigned long distance;
struct mca_btl_portals_module_t *mybtl =
struct mca_btl_portals_module_t *ptl_btl =
(struct mca_btl_portals_module_t*) btl;
bool need_recv_setup = false;
/* make sure our environment is fully initialized. At end of this
call, we have a working network handle on our module and
portals_procs will have the portals process identifier for each
proc (ordered, in theory) */
ret = mca_btl_portals_add_procs_compat(mybtl, nprocs, procs,
ret = mca_btl_portals_add_procs_compat(ptl_btl, nprocs, procs,
&portals_procs);
if (OMPI_SUCCESS != ret) return ret;
OPAL_THREAD_LOCK(&ptl_btl->portals_lock);
if (0 == opal_list_get_size(&ptl_btl->portals_endpoint_list)) {
need_recv_setup = true;
}
/* loop through all procs, setting our reachable flag */
for (i= 0; i < nprocs ; ++i) {
curr_proc = procs[i];
/* BWB - do we want to send to self? No for now */
if (curr_proc == local_proc) continue;
/* make sure we can reach the process */
ret = PtlNIDist(mybtl->portals_ni_h,
peers[i] = OBJ_NEW(mca_btl_portals_endpoint_t);
peers[i]->endpoint_btl = ptl_btl;
peers[i]->endpoint_proc = curr_proc;
peers[i]->endpoint_ptl_id = portals_procs[i];
opal_list_append(&ptl_btl->portals_endpoint_list,
(opal_list_item_t*) peers[i]);
/* make sure we can reach the process - this is supposed to be
a cheap-ish operation */
ret = PtlNIDist(ptl_btl->portals_ni_h,
portals_procs[i],
&distance);
if (ret != PTL_OK) {
@ -103,14 +120,21 @@ mca_btl_portals_add_procs(struct mca_btl_base_module_t* btl,
continue;
}
/* set the peer as a pointer to the address */
peers[i] = (struct mca_btl_base_endpoint_t*) &(portals_procs[i]);
/* and here we can reach */
ompi_bitmap_set_bit(reachable, i);
}
return OMPI_SUCCESS;
if (NULL != portals_procs) free(portals_procs);
OPAL_THREAD_UNLOCK(&ptl_btl->portals_lock);
if (need_recv_setup) {
ret = mca_btl_portals_recv_enable(ptl_btl);
} else {
ret = OMPI_SUCCESS;
}
return ret;
}
@ -120,12 +144,95 @@ mca_btl_portals_del_procs(struct mca_btl_base_module_t *btl,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t **peers)
{
/* yeah, I have no idea what to do here */
mca_btl_portals_module_t *ptl_btl =
(mca_btl_portals_module_t*) btl;
size_t i = 0;
int ret = OMPI_SUCCESS;
bool need_recv_shutdown = false;
OPAL_THREAD_LOCK(&ptl_btl->portals_lock);
for (i = 0 ; i < nprocs ; ++i) {
opal_list_remove_item(&ptl_btl->portals_endpoint_list,
(opal_list_item_t*) peers[i]);
OBJ_RELEASE(peers[i]);
}
if (0 == opal_list_get_size(&ptl_btl->portals_endpoint_list)) {
need_recv_shutdown = true;
}
OPAL_THREAD_UNLOCK(&ptl_btl->portals_lock);
if (need_recv_shutdown) {
ret = mca_btl_portals_recv_disable(ptl_btl);
} else {
ret = OMPI_SUCCESS;
}
return ret;
}
int
mca_btl_portals_register(struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata)
{
mca_btl_portals_module_t* portals_btl = (mca_btl_portals_module_t*) btl;
portals_btl->portals_reg[tag].cbfunc = cbfunc;
portals_btl->portals_reg[tag].cbdata = cbdata;
return OMPI_SUCCESS;
}
mca_btl_base_descriptor_t*
mca_btl_portals_alloc(struct mca_btl_base_module_t* btl,
size_t size)
{
mca_btl_portals_module_t* portals_btl = (mca_btl_portals_module_t*) btl;
mca_btl_portals_frag_t* frag;
int rc;
if (size <= btl->btl_eager_limit) {
MCA_BTL_PORTALS_FRAG_ALLOC_EAGER(portals_btl, frag, rc);
frag->segment.seg_len =
size <= btl->btl_eager_limit ?
size : btl->btl_eager_limit ;
} else {
MCA_BTL_PORTALS_FRAG_ALLOC_MAX(portals_btl, frag, rc);
frag->segment.seg_len =
size <= btl->btl_max_send_size ?
size : btl->btl_max_send_size ;
}
frag->base.des_flags = 0;
return (mca_btl_base_descriptor_t*) frag;
}
int
mca_btl_portals_free(struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des)
{
mca_btl_portals_frag_t* frag = (mca_btl_portals_frag_t*) des;
if (frag->size == 0) {
MCA_BTL_PORTALS_FRAG_RETURN_USER(btl, frag);
} else if (frag->size == btl->btl_eager_limit){
MCA_BTL_PORTALS_FRAG_RETURN_EAGER(btl, frag);
} else if (frag->size == btl->btl_max_send_size) {
MCA_BTL_PORTALS_FRAG_RETURN_MAX(btl, frag);
} else {
return OMPI_ERR_BAD_PARAM;
}
return OMPI_SUCCESS;
}
int
mca_btl_portals_finalize(struct mca_btl_base_module_t *btl_base)

Просмотреть файл

@ -28,6 +28,8 @@
#include "class/ompi_free_list.h"
#include "class/ompi_proc_table.h"
#include "btl_portals_endpoint.h"
/*
* Portals BTL component.
@ -86,6 +88,9 @@ struct mca_btl_portals_module_t {
/* registered callbacks */
mca_btl_base_recv_reg_t portals_reg[MCA_BTL_TAG_MAX];
/* list of connected procs */
opal_list_t portals_endpoint_list;
ompi_free_list_t portals_frag_eager;
ompi_free_list_t portals_frag_max;
ompi_free_list_t portals_frag_user;
@ -107,6 +112,9 @@ struct mca_btl_portals_module_t {
/* number of dropped messages */
ptl_sr_value_t portals_sr_dropped;
/* lock for accessing module */
opal_mutex_t portals_lock;
};
typedef struct mca_btl_portals_module_t mca_btl_portals_module_t;
@ -131,9 +139,11 @@ int mca_btl_portals_component_progress(void);
*/
int mca_btl_portals_init(mca_btl_portals_component_t *comp);
/* 4th argument is a ptl_peers array, as that's what we'll get back
from many of the access functions... */
int mca_btl_portals_add_procs_compat(mca_btl_portals_module_t* btl,
size_t nprocs, struct ompi_proc_t **procs,
ptl_process_id_t **portals_procs);
ptl_process_id_t **ptl_peers);
/*
* Module configuration functions (btl_portals.c)

Просмотреть файл

@ -146,7 +146,7 @@ mca_btl_portals_component_open(void)
opal_output_open(&portals_output_stream);
/* fill default module state */
mca_btl_portals_module.super.btl_flags = MCA_BTL_FLAGS_RDMA;
mca_btl_portals_module.super.btl_flags = MCA_BTL_FLAGS_SEND;
for (i = 0 ; i < MCA_BTL_PORTALS_EQ_SIZE ; ++i) {
mca_btl_portals_module.portals_eq_sizes[i] = 0;

Просмотреть файл

@ -0,0 +1,45 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "portals_config.h"
#include "btl_portals.h"
#include "btl_portals_endpoint.h"
/*
* Initialize state of the endpoint instance.
*
*/
static void mca_btl_portals_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
{
endpoint->endpoint_btl = NULL;
endpoint->endpoint_proc = NULL;
endpoint->endpoint_ptl_id.nid = 0;
endpoint->endpoint_ptl_id.pid = 0;
}
OBJ_CLASS_INSTANCE(
mca_btl_portals_endpoint_t,
opal_object_t,
mca_btl_portals_endpoint_construct,
NULL);

Просмотреть файл

@ -0,0 +1,58 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_PORTALS_ENDPOINT_H
#define MCA_BTL_PORTALS_ENDPOINT_H
#include "opal/class/opal_list.h"
#include "mca/btl/btl.h"
#include "btl_portals.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_btl_base_endpoint_t is associated w/ each process
* and BTL pair at startup. However, connections to the endpoint
* are established dynamically on an as-needed basis:
*/
struct mca_btl_base_endpoint_t {
opal_list_item_t super;
/** BTL instance that created this connection */
struct mca_btl_portals_module_t* endpoint_btl;
/** proc structure corresponding to endpoint */
struct ompi_proc_t *endpoint_proc;
/** Portals address for endpoint */
ptl_process_id_t endpoint_ptl_id;
};
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
typedef mca_btl_base_endpoint_t mca_btl_portals_endpoint_t;
OBJ_CLASS_DECLARATION(mca_btl_portals_endpoint_t);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_BTL_PORTALS_ENDPOINT_H */

Просмотреть файл

@ -21,31 +21,45 @@
#include "btl_portals_frag.h"
static void mca_btl_portals_frag_common_constructor(mca_btl_portals_frag_t* frag)
static void
mca_btl_portals_frag_common_constructor(mca_btl_portals_frag_t* frag)
{
mca_btl_portals_frag_common_constructor(frag);
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_dst = NULL;
frag->base.des_dst = 0;
frag->base.des_dst_cnt = 0;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->segment.seg_addr.pval = frag + sizeof(mca_btl_portals_frag_t);
frag->segment.seg_len = frag->size;
frag->segment.seg_key.key64 = 0;
}
static void mca_btl_portals_frag_eager_constructor(mca_btl_portals_frag_t* frag)
static void
mca_btl_portals_frag_eager_constructor(mca_btl_portals_frag_t* frag)
{
frag->size = mca_btl_portals_module.super.btl_eager_limit;
mca_btl_portals_frag_common_constructor(frag);
}
static void mca_btl_portals_frag_max_constructor(mca_btl_portals_frag_t* frag)
static void
mca_btl_portals_frag_max_constructor(mca_btl_portals_frag_t* frag)
{
frag->size = mca_btl_portals_module.super.btl_max_send_size;
mca_btl_portals_frag_common_constructor(frag);
}
static void mca_btl_portals_frag_user_constructor(mca_btl_portals_frag_t* frag)
static void
mca_btl_portals_frag_user_constructor(mca_btl_portals_frag_t* frag)
{
frag->base.des_flags = 0;
frag->base.des_dst = 0;
frag->base.des_dst_cnt = 0;
frag->base.des_src = 0;
frag->base.des_src_cnt = 0;
frag->size = 0;
mca_btl_portals_frag_common_constructor(frag);
}

Просмотреть файл

@ -30,8 +30,9 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_portals_frag_t);
struct mca_btl_portals_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment;
struct mca_btl_portals_module_t *btl;
struct mca_btl_base_endpoint_t *endpoint;
mca_btl_base_header_t *hdr;
mca_btl_base_header_t hdr;
size_t size;
};
typedef struct mca_btl_portals_frag_t mca_btl_portals_frag_t;

Просмотреть файл

@ -24,6 +24,20 @@
#include "btl_portals_recv.h"
int
mca_btl_portals_recv_enable(mca_btl_portals_module_t *module)
{
return OMPI_SUCCESS;
}
int
mca_btl_portals_recv_disable(mca_btl_portals_module_t *module)
{
return OMPI_SUCCESS;
}
int
mca_btl_portals_process_recv(mca_btl_portals_module_t *module,
ptl_event_t *ev)

Просмотреть файл

@ -17,6 +17,10 @@
#ifndef MCA_BTL_PORTALS_RECV_H
#define MCA_BTL_PORTALS_RECV_H
int mca_btl_portals_recv_enable(mca_btl_portals_module_t *module);
int mca_btl_portals_recv_disable(mca_btl_portals_module_t *module);
int mca_btl_portals_process_recv(mca_btl_portals_module_t *module,
ptl_event_t *ev);

Просмотреть файл

@ -28,5 +28,24 @@ int
mca_btl_portals_process_send(mca_btl_portals_module_t *module,
ptl_event_t *ev)
{
opal_output_verbose(99, mca_btl_portals_component.portals_output,
"process_send");
return OMPI_SUCCESS;
}
int
mca_btl_portals_send(struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag)
{
mca_btl_portals_module_t *ptl_btl = (mca_btl_portals_module_t*) btl;
mca_btl_portals_frag_t *frag = (mca_btl_portals_frag_t*) descriptor;
frag->endpoint = endpoint;
frag->hdr.tag = tag;
frag->btl = ptl_btl;
return mca_btl_portals_send_frag(frag);
}

Просмотреть файл

@ -17,8 +17,56 @@
#ifndef MCA_BTL_PORTALS_SEND_H
#define MCA_BTL_PORTALS_SEND_H
#include "btl_portals_frag.h"
int mca_btl_portals_process_send(mca_btl_portals_module_t *module,
ptl_event_t *ev);
static inline int
mca_btl_portals_send_frag(mca_btl_portals_frag_t *frag)
{
ptl_md_t md;
ptl_handle_md_t md_h;
int ret;
/* setup the send */
md.start = frag->segment.seg_addr.pval;
md.length = frag->segment.seg_len;
md.threshold = PTL_MD_THRESH_INF; /* unlink based on protocol */
md.max_size = 0;
md.options = 0; /* BWB - can we optimize? */
md.user_ptr = frag; /* keep a pointer to ourselves */
md.eq_handle = frag->btl->portals_eq_handles[MCA_BTL_PORTALS_EQ_SEND];
/* make a free-floater */
ret = PtlMDBind(frag->btl->portals_ni_h,
md,
PTL_UNLINK,
&md_h);
if (ret != PTL_OK) {
opal_output(mca_btl_portals_component.portals_output,
"PtlMDBind failed with error %d", ret);
return OMPI_ERROR;
}
ret = PtlPut(md_h,
PTL_ACK_REQ,
frag->endpoint->endpoint_ptl_id,
BTL_PORTALS_SEND_TABLE_ID,
0, /* ac_index */
0, /* match bits */
0, /* remote offset - not used */
frag->hdr.tag); /* hdr_data - tag */
if (ret != PTL_OK) {
opal_output(mca_btl_portals_component.portals_output,
"PtlPut failed with error %d", ret);
PtlMDUnlink(md_h);
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
#endif /* MCA_BTL_PORTALS_SEND_H */

Просмотреть файл

@ -30,35 +30,6 @@
* BWB - README - BWB - README - BWB - README - BWB - README - BWB */
int
mca_btl_portals_register(struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata)
{
printf("btl register\n");
return OMPI_SUCCESS;
}
mca_btl_base_descriptor_t*
mca_btl_portals_alloc(struct mca_btl_base_module_t* btl,
size_t size)
{
printf("btl alloc: %d\n", size);
return NULL;
}
int
mca_btl_portals_free(struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des)
{
printf("btl free\n");
return OMPI_ERR_NOT_IMPLEMENTED;
}
mca_btl_base_descriptor_t*
mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* peer,
@ -85,17 +56,6 @@ mca_btl_portals_prepare_dst(struct mca_btl_base_module_t* btl,
}
int
mca_btl_portals_send(struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag)
{
printf("btl send\n");
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
mca_btl_portals_put(struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,