1
1
- removed unused state
- starting to add support for btl failover

This commit was SVN r9431.
Этот коммит содержится в:
Tim Woodall 2006-03-27 22:48:12 +00:00
родитель c724e4c804
Коммит c1bf71b1be
15 изменённых файлов: 158 добавлений и 43 удалений

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
@ -27,14 +27,11 @@
static void mca_bml_base_endpoint_construct(mca_bml_base_endpoint_t* ep)
{
ep->btl_proc = NULL;
ep->btl_rdma_offset = 0;
ep->btl_max_send_size = 0;
ep->btl_flags = 0;
ep->btl_rdma_size = 0;
ep->btl_rdma_align = 0;
OBJ_CONSTRUCT(&ep->btl_lock, opal_mutex_t);
OBJ_CONSTRUCT(&ep->btl_eager, mca_bml_base_btl_array_t);
OBJ_CONSTRUCT(&ep->btl_send, mca_bml_base_btl_array_t);
OBJ_CONSTRUCT(&ep->btl_rdma, mca_bml_base_btl_array_t);
@ -43,7 +40,6 @@ static void mca_bml_base_endpoint_construct(mca_bml_base_endpoint_t* ep)
static void mca_bml_base_endpoint_destruct(mca_bml_base_endpoint_t* ep)
{
OBJ_DESTRUCT(&ep->btl_lock);
OBJ_DESTRUCT(&ep->btl_eager);
OBJ_DESTRUCT(&ep->btl_send);
OBJ_DESTRUCT(&ep->btl_rdma);

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*

Просмотреть файл

@ -1,17 +0,0 @@
mca_bml_base_module_t* mca_bml_base_module_lookup(const char* name)
{
opal_list_item_t* item;
for (item = opal_list_get_first(&mca_bml_base_modules);
item != opal_list_get_end(&mca_bml_base_modules);
item = opal_list_get_next(item)) {
mca_mpool_base_selected_module_t *mli =
(mca_mpool_base_selected_module_t *) item;
if(0 == strcmp(mli->mpool_component->mpool_version.mca_component_name,
name)) {
return mli->mpool_module;
}
}
return NULL;
}

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
@ -211,9 +211,6 @@ static inline mca_bml_base_btl_t* mca_bml_base_btl_array_find(
*/
struct mca_bml_base_endpoint_t {
mca_pml_proc_t super;
struct ompi_proc_t *btl_proc; /**< back-pointer to ompi_proc_t */
opal_mutex_t btl_lock; /**< lock to protect against concurrent access */
int btl_flags; /**< prefered method of accessing this peer */
size_t btl_rdma_offset; /**< max of min rdma size for available rmda btls */
size_t btl_max_send_size; /**< min of max send size for available send btls */
size_t btl_rdma_size; /**< max of min rdma size for available rmda btls */
@ -451,7 +448,6 @@ typedef int (*mca_bml_base_module_finalize_fn_t)( void );
/**
* PML->BML notification of change in the process list.
*
* @param bml (IN) BML module
* @param nprocs (IN) Number of processes
* @param procs (IN) Set of processes
* @param endpoint (OUT) Set of (optional) mca_bml_base_endpoint_t structures by BML.
@ -482,10 +478,8 @@ typedef int (*mca_bml_base_module_add_procs_fn_t)(
/**
* Notification of change to the process list.
*
* @param bml (IN) BML module
* @param nprocs (IN) Number of processes
* @param proc (IN) Set of processes
* @param peer (IN) Set of peer addressing information.
* @return Status indicating if cleanup was successful
*
* When the process list changes, the PML notifies the BML of the
@ -497,6 +491,28 @@ typedef int (*mca_bml_base_module_del_procs_fn_t)(
struct ompi_proc_t** procs
);
/**
* Notification of change to the btl list.
*
* @param bml (IN) BTL module
* @return Status indicating if cleanup was successful
*
* On recovery of a btl, add it to the set of forwarding
* entries used by the BML.
*/
typedef int (*mca_bml_base_module_add_btl_fn_t)( struct mca_btl_base_module_t* );
/**
* Notification of change to the btl list.
*
* @param bml (IN) BTL module
* @return Status indicating if cleanup was successful
*
* On failure of a btl, remove it from the set of forwarding
* entries used by the BML.
*/
typedef int (*mca_bml_base_module_del_btl_fn_t)( struct mca_btl_base_module_t* );
/**
* Callback function that is called asynchronously on receipt
* of data by the transport layer.
@ -544,6 +560,8 @@ struct mca_bml_base_module_t {
/* BML function table */
mca_bml_base_module_add_procs_fn_t bml_add_procs;
mca_bml_base_module_del_procs_fn_t bml_del_procs;
mca_bml_base_module_add_btl_fn_t bml_add_btl;
mca_bml_base_module_del_btl_fn_t bml_del_btl;
mca_bml_base_module_register_fn_t bml_register;
mca_bml_base_module_finalize_fn_t bml_finalize;

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
@ -43,6 +43,8 @@ mca_bml_r2_module_t mca_bml_r2 = {
0, /* max rdma size */
mca_bml_r2_add_procs,
mca_bml_r2_del_procs,
mca_bml_r2_add_btl,
mca_bml_r2_del_btl,
mca_bml_r2_register,
mca_bml_r2_finalize,
mca_bml_r2_progress
@ -259,7 +261,7 @@ int mca_bml_r2_add_procs(
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
bml_endpoint->btl_max_send_size = -1;
bml_endpoint->btl_rdma_size = -1;
bml_endpoint->btl_proc = proc;
bml_endpoint->super.proc_ompi = proc;
proc->proc_pml = (struct mca_pml_proc_t*) bml_endpoint;
}
@ -302,7 +304,6 @@ int mca_bml_r2_add_procs(
bml_btl->btl_put = btl->btl_put;
bml_btl->btl_get = btl->btl_get;
bml_btl->btl_flags = btl->btl_flags;
bml_btl->btl_mpool = btl->btl_mpool;
}
}
@ -483,6 +484,118 @@ int mca_bml_r2_finalize( void ) {
return OMPI_SUCCESS; /* TODO */
}
/*
* (1) Remove btl from each bml endpoint
* (2) Remove btl from the global list
*/
int mca_bml_r2_del_btl(mca_btl_base_module_t* btl)
{
ompi_proc_t** procs;
size_t p, num_procs;
procs = ompi_proc_all(&num_procs);
if(NULL == procs)
return OMPI_SUCCESS;
for(p=0; p<num_procs; p++) {
double total_bandwidth = 0;
ompi_proc_t* proc = procs[p];
mca_bml_base_endpoint_t* ep_old = (mca_bml_base_endpoint_t*)proc->proc_pml;
mca_bml_base_endpoint_t* ep_new = OBJ_NEW(mca_bml_base_endpoint_t);
size_t b;
/* initialize */
ep_new->super.proc_ompi = proc;
ep_new->btl_max_send_size = -1;
ep_new->btl_rdma_size = -1;
ep_new->btl_rdma_align = 0;
ep_new->btl_rdma_offset = 0;
/* build new eager list */
mca_bml_base_btl_array_reserve(&ep_new->btl_eager, mca_bml_base_btl_array_get_size(&ep_old->btl_eager));
for(b=0; b< mca_bml_base_btl_array_get_size(&ep_old->btl_eager); b++) {
mca_bml_base_btl_t* bml_btl_old = mca_bml_base_btl_array_get_index(&ep_old->btl_eager, b);
if(bml_btl_old->btl != btl) {
mca_bml_base_btl_t* bml_btl_new = mca_bml_base_btl_array_insert(&ep_new->btl_eager);
*bml_btl_new = *bml_btl_old;
}
}
/* build new send list */
total_bandwidth = 0;
mca_bml_base_btl_array_reserve(&ep_new->btl_send, mca_bml_base_btl_array_get_size(&ep_old->btl_send));
for(b=0; b< mca_bml_base_btl_array_get_size(&ep_old->btl_send); b++) {
mca_bml_base_btl_t* bml_btl_old = mca_bml_base_btl_array_get_index(&ep_old->btl_send, b);
if(bml_btl_old->btl != btl) {
mca_bml_base_btl_t* bml_btl_new = mca_bml_base_btl_array_insert(&ep_new->btl_send);
*bml_btl_new = *bml_btl_old;
total_bandwidth += bml_btl_new->btl->btl_bandwidth;
if (bml_btl_new->btl_max_send_size < ep_new->btl_max_send_size) {
ep_new->btl_max_send_size = bml_btl_new->btl->btl_max_send_size;
}
}
}
/* compute weighting factor for this btl */
for(b=0; b< mca_bml_base_btl_array_get_size(&ep_new->btl_send); b++) {
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&ep_old->btl_send, b);
if(bml_btl->btl->btl_bandwidth > 0) {
bml_btl->btl_weight = bml_btl->btl->btl_bandwidth / total_bandwidth;
} else {
bml_btl->btl_weight = 1.0 / mca_bml_base_btl_array_get_size(&ep_new->btl_send);
}
}
/* build new rdma list */
total_bandwidth = 0;
mca_bml_base_btl_array_reserve(&ep_new->btl_rdma, mca_bml_base_btl_array_get_size(&ep_old->btl_rdma));
for(b=0; b< mca_bml_base_btl_array_get_size(&ep_old->btl_rdma); b++) {
mca_bml_base_btl_t* bml_btl_old = mca_bml_base_btl_array_get_index(&ep_old->btl_rdma, b);
if(bml_btl_old->btl != btl) {
mca_bml_base_btl_t* bml_btl_new = mca_bml_base_btl_array_insert(&ep_new->btl_rdma);
*bml_btl_new = *bml_btl_old;
/* update aggregate endpoint info */
total_bandwidth += bml_btl_new->btl->btl_bandwidth;
if (ep_new->btl_rdma_offset < bml_btl_new->btl_min_rdma_size) {
ep_new->btl_rdma_offset = bml_btl_new->btl_min_rdma_size;
}
if (ep_new->btl_rdma_size > bml_btl_new->btl_max_rdma_size) {
ep_new->btl_rdma_size = bml_btl_new->btl_max_rdma_size;
ep_new->btl_rdma_align = bml_base_log2(ep_new->btl_rdma_size);
}
}
}
/* compute weighting factor for this btl */
for(b=0; b< mca_bml_base_btl_array_get_size(&ep_new->btl_rdma); b++) {
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&ep_old->btl_rdma, b);
if(bml_btl->btl->btl_bandwidth > 0) {
bml_btl->btl_weight = bml_btl->btl->btl_bandwidth / total_bandwidth;
} else {
bml_btl->btl_weight = 1.0 / mca_bml_base_btl_array_get_size(&ep_new->btl_rdma);
}
}
/* save on proc */
proc->proc_pml = (mca_pml_proc_t*)ep_new;
/* DONT delete old endpoint structure as it may still be in use... */
}
return OMPI_SUCCESS;
}
int mca_bml_r2_add_btl(mca_btl_base_module_t* btl)
{
return ORTE_ERR_NOT_IMPLEMENTED;
}
/*
* Register callback w/ all active btls
*/
int mca_bml_r2_register(
mca_btl_base_tag_t tag,
mca_bml_base_module_recv_cb_fn_t cbfunc,
@ -510,3 +623,4 @@ int mca_bml_r2_component_fini(void)
return OMPI_SUCCESS;
}

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
@ -83,6 +83,10 @@ int mca_bml_r2_del_procs(
struct ompi_proc_t** procs
);
int mca_bml_r2_add_btl( mca_btl_base_module_t* btl );
int mca_bml_r2_del_btl( mca_btl_base_module_t* btl );
int mca_bml_r2_register(
mca_btl_base_tag_t tag,
mca_bml_base_module_recv_cb_fn_t cbfunc,

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*

Просмотреть файл

@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*