2015-01-14 10:21:24 -07:00
|
|
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2006 Voltaire. All rights reserved.
|
|
|
|
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
|
|
|
|
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
|
|
|
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
|
|
|
* reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
2006-08-15 18:40:08 +00:00
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
* Description of the Registration Cache framework
|
|
|
|
*/
|
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
#include "opal_config.h"
|
2007-01-08 22:03:16 +00:00
|
|
|
|
2006-08-15 18:40:08 +00:00
|
|
|
#include "opal/mca/mca.h"
|
|
|
|
#include "rcache_vma_tree.h"
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(mca_rcache_vma_reg_list_item_t, opal_list_item_t, NULL, NULL);
|
|
|
|
|
|
|
|
static void mca_rcache_vma_construct(opal_object_t *object)
|
|
|
|
{
|
|
|
|
mca_rcache_vma_t *vma = (mca_rcache_vma_t*)object;
|
|
|
|
OBJ_CONSTRUCT(&vma->reg_list, opal_list_t);
|
2010-02-23 11:31:58 +00:00
|
|
|
OBJ_CONSTRUCT(&vma->reg_delete_list, opal_list_t);
|
2006-08-15 18:40:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void mca_rcache_vma_destruct(opal_object_t *object)
|
|
|
|
{
|
|
|
|
mca_rcache_vma_t *vma = (mca_rcache_vma_t*)object;
|
|
|
|
OBJ_DESTRUCT(&vma->reg_list);
|
2010-02-23 11:31:58 +00:00
|
|
|
OBJ_DESTRUCT(&vma->reg_delete_list);
|
2006-08-15 18:40:08 +00:00
|
|
|
}
|
|
|
|
|
2015-01-14 10:21:24 -07:00
|
|
|
OBJ_CLASS_INSTANCE(mca_rcache_vma_t, opal_list_item_t,
|
2006-08-15 18:40:08 +00:00
|
|
|
mca_rcache_vma_construct, mca_rcache_vma_destruct);
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Function for the red black tree to compare 2 keys
|
|
|
|
*
|
|
|
|
* @param key1 a pointer to the 1st key
|
|
|
|
* @param key2 a pointer to the second key
|
|
|
|
*
|
|
|
|
* @retval -1 if key1 is below key2
|
|
|
|
* @retval 1 if key 1 is above key2
|
|
|
|
* @retval 0 if the keys are the same
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int mca_rcache_vma_tree_node_compare(void *key1, void *key2)
|
|
|
|
{
|
|
|
|
mca_rcache_vma_t *vma1 = (mca_rcache_vma_t*)key1,
|
|
|
|
*vma2 = (mca_rcache_vma_t*)key2;
|
|
|
|
|
|
|
|
if(vma1->start < vma2->start)
|
|
|
|
return -1;
|
|
|
|
if(vma1->start > vma2->start)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mca_rcache_vma_tree_node_compare_search(void *key1, void *key2)
|
|
|
|
{
|
|
|
|
mca_rcache_vma_t *vma = (mca_rcache_vma_t*)key2;
|
|
|
|
uintptr_t addr = (uintptr_t)key1;
|
|
|
|
|
|
|
|
if(vma->end < addr)
|
|
|
|
return 1;
|
|
|
|
if(vma->start <= addr)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mca_rcache_vma_tree_node_compare_closest(void *key1, void *key2)
|
|
|
|
{
|
|
|
|
mca_rcache_vma_t *vma = (mca_rcache_vma_t*)key2, *prev_vma;
|
|
|
|
uintptr_t addr = (uintptr_t)key1;
|
|
|
|
|
|
|
|
if(vma->end < addr)
|
|
|
|
return 1;
|
|
|
|
if(vma->start <= addr)
|
|
|
|
return 0;
|
|
|
|
prev_vma = (mca_rcache_vma_t *)opal_list_get_prev(&vma->super.super);
|
2006-11-01 13:44:47 +00:00
|
|
|
if(prev_vma == (mca_rcache_vma_t *)opal_list_get_end(&vma->rcache->vma_list)
|
|
|
|
|| prev_vma->end < addr)
|
2006-08-15 18:40:08 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2006-11-01 13:44:47 +00:00
|
|
|
static inline mca_rcache_vma_t *mca_rcache_vma_new(
|
|
|
|
mca_rcache_vma_module_t *vma_rcache, uintptr_t start, uintptr_t end)
|
2006-08-15 18:40:08 +00:00
|
|
|
{
|
|
|
|
mca_rcache_vma_t *vma = OBJ_NEW(mca_rcache_vma_t);
|
|
|
|
|
|
|
|
if(NULL == vma)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
vma->start = start;
|
|
|
|
vma->end = end;
|
2006-11-01 13:44:47 +00:00
|
|
|
vma->rcache = vma_rcache;
|
2006-08-15 18:40:08 +00:00
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
(void)opal_rb_tree_insert(&vma_rcache->rb_tree, vma, vma);
|
2006-08-15 18:40:08 +00:00
|
|
|
|
|
|
|
return vma;
|
|
|
|
}
|
|
|
|
|
2010-02-22 11:19:15 +00:00
|
|
|
void mca_rcache_vma_destroy(mca_rcache_vma_t *vma)
|
2006-08-15 18:40:08 +00:00
|
|
|
{
|
|
|
|
opal_list_item_t *item;
|
|
|
|
|
|
|
|
while ((item = opal_list_remove_first(&vma->reg_list)))
|
|
|
|
OBJ_RELEASE(item);
|
|
|
|
|
2010-02-23 11:31:58 +00:00
|
|
|
while ((item = opal_list_remove_first(&vma->reg_delete_list)))
|
|
|
|
OBJ_RELEASE(item);
|
|
|
|
|
2006-08-15 18:40:08 +00:00
|
|
|
OBJ_RELEASE(vma);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int mca_rcache_vma_compare_regs(
|
|
|
|
mca_mpool_base_registration_t *reg1,
|
|
|
|
mca_mpool_base_registration_t *reg2)
|
|
|
|
{
|
2006-12-17 12:26:41 +00:00
|
|
|
/* persisten registration are on top */
|
|
|
|
if((reg1->flags & MCA_MPOOL_FLAGS_PERSIST) &&
|
|
|
|
!(reg2->flags & MCA_MPOOL_FLAGS_PERSIST))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if(!(reg1->flags & MCA_MPOOL_FLAGS_PERSIST) &&
|
|
|
|
(reg2->flags & MCA_MPOOL_FLAGS_PERSIST))
|
|
|
|
return -1;
|
|
|
|
|
2006-08-15 18:40:08 +00:00
|
|
|
if (reg1->bound != reg2->bound)
|
2006-12-17 12:26:41 +00:00
|
|
|
return (int)(reg1->bound - reg2->bound);
|
2006-08-15 18:40:08 +00:00
|
|
|
|
|
|
|
/* tie breaker */
|
|
|
|
return (int)((uintptr_t)reg1 - (uintptr_t)reg2);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int mca_rcache_vma_add_reg(mca_rcache_vma_t *vma,
|
|
|
|
mca_mpool_base_registration_t *reg)
|
|
|
|
{
|
|
|
|
opal_list_item_t *i;
|
|
|
|
mca_rcache_vma_reg_list_item_t *item, *entry;
|
|
|
|
|
|
|
|
entry = OBJ_NEW(mca_rcache_vma_reg_list_item_t);
|
|
|
|
|
|
|
|
if(!entry)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
entry->reg = reg;
|
|
|
|
|
|
|
|
for(i = opal_list_get_first(&vma->reg_list);
|
|
|
|
i != opal_list_get_end(&vma->reg_list);
|
|
|
|
i = opal_list_get_next(i)) {
|
|
|
|
item = (mca_rcache_vma_reg_list_item_t*)i;
|
|
|
|
|
|
|
|
if(mca_rcache_vma_compare_regs(item->reg, reg) > 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
opal_list_insert_pos(&vma->reg_list, &item->super, &entry->super);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
opal_list_append(&vma->reg_list, &entry->super);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void mca_rcache_vma_remove_reg(mca_rcache_vma_t *vma,
|
|
|
|
mca_mpool_base_registration_t *reg)
|
|
|
|
{
|
|
|
|
opal_list_item_t *i;
|
|
|
|
mca_rcache_vma_reg_list_item_t *item;
|
|
|
|
|
|
|
|
for(i = opal_list_get_first(&vma->reg_list);
|
|
|
|
i != opal_list_get_end(&vma->reg_list);
|
|
|
|
i = opal_list_get_next(i)) {
|
|
|
|
item = (mca_rcache_vma_reg_list_item_t*)i;
|
|
|
|
|
|
|
|
if(item->reg == reg) {
|
|
|
|
opal_list_remove_item(&vma->reg_list, &item->super);
|
2010-02-23 11:31:58 +00:00
|
|
|
opal_list_append(&vma->reg_delete_list, &item->super);
|
2006-08-15 18:40:08 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int mca_rcache_vma_copy_reg_list(mca_rcache_vma_t *to,
|
|
|
|
mca_rcache_vma_t *from)
|
|
|
|
{
|
|
|
|
opal_list_item_t *i;
|
|
|
|
mca_rcache_vma_reg_list_item_t *item_f, *item_t;
|
|
|
|
for(i = opal_list_get_first(&from->reg_list);
|
|
|
|
i != opal_list_get_end(&from->reg_list);
|
|
|
|
i = opal_list_get_next(i)) {
|
|
|
|
item_f = (mca_rcache_vma_reg_list_item_t*)i;
|
|
|
|
item_t = OBJ_NEW(mca_rcache_vma_reg_list_item_t);
|
|
|
|
|
|
|
|
if(NULL == item_t)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
item_t->reg = item_f->reg;
|
|
|
|
|
|
|
|
opal_list_append(&to->reg_list, &item_t->super);
|
|
|
|
}
|
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_SUCCESS;
|
2006-08-15 18:40:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* returns 1 iff two lists contain the same entries */
|
|
|
|
static inline int mca_rcache_vma_compare_reg_lists(mca_rcache_vma_t *vma1,
|
|
|
|
mca_rcache_vma_t *vma2)
|
|
|
|
{
|
|
|
|
mca_rcache_vma_reg_list_item_t *i1, *i2;
|
|
|
|
|
|
|
|
if (!vma1 || !vma2)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if(opal_list_get_size(&vma1->reg_list) !=
|
|
|
|
opal_list_get_size(&vma2->reg_list))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
i1 = (mca_rcache_vma_reg_list_item_t*)opal_list_get_first(&vma1->reg_list);
|
|
|
|
i2 = (mca_rcache_vma_reg_list_item_t*)opal_list_get_first(&vma2->reg_list);
|
|
|
|
|
|
|
|
do {
|
|
|
|
if(i1 == (mca_rcache_vma_reg_list_item_t*)opal_list_get_end(&vma1->reg_list) ||
|
|
|
|
i2 == (mca_rcache_vma_reg_list_item_t*)opal_list_get_end(&vma2->reg_list))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if(i1->reg != i2->reg)
|
|
|
|
break;
|
|
|
|
|
|
|
|
i1 = (mca_rcache_vma_reg_list_item_t*)opal_list_get_next(i1);
|
|
|
|
i2 = (mca_rcache_vma_reg_list_item_t*)opal_list_get_next(i2);
|
|
|
|
} while(1);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int mca_rcache_vma_tree_init(mca_rcache_vma_module_t* rcache)
|
|
|
|
{
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
OBJ_CONSTRUCT(&rcache->rb_tree, opal_rb_tree_t);
|
2006-08-15 18:40:08 +00:00
|
|
|
OBJ_CONSTRUCT(&rcache->vma_list, opal_list_t);
|
2010-02-22 11:19:15 +00:00
|
|
|
OBJ_CONSTRUCT(&rcache->vma_delete_list, opal_list_t);
|
2006-12-17 12:26:41 +00:00
|
|
|
rcache->reg_cur_cache_size = 0;
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return opal_rb_tree_init(&rcache->rb_tree,
|
2006-08-15 18:40:08 +00:00
|
|
|
mca_rcache_vma_tree_node_compare);
|
|
|
|
}
|
|
|
|
|
2013-10-23 15:51:44 +00:00
|
|
|
void mca_rcache_vma_tree_finalize(mca_rcache_vma_module_t* rcache)
|
|
|
|
{
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
opal_rb_tree_init(&rcache->rb_tree,
|
2013-10-23 15:51:44 +00:00
|
|
|
mca_rcache_vma_tree_node_compare);
|
|
|
|
OBJ_DESTRUCT(&rcache->vma_delete_list);
|
|
|
|
OBJ_DESTRUCT(&rcache->vma_list);
|
|
|
|
OBJ_DESTRUCT(&rcache->rb_tree);
|
|
|
|
}
|
|
|
|
|
2006-08-15 18:40:08 +00:00
|
|
|
mca_mpool_base_registration_t *mca_rcache_vma_tree_find(
|
|
|
|
mca_rcache_vma_module_t* vma_rcache, unsigned char *base,
|
|
|
|
unsigned char *bound)
|
|
|
|
{
|
|
|
|
mca_rcache_vma_t *vma;
|
|
|
|
mca_rcache_vma_reg_list_item_t *item;
|
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
vma = (mca_rcache_vma_t*)opal_rb_tree_find_with(&vma_rcache->rb_tree, base,
|
2006-08-15 18:40:08 +00:00
|
|
|
mca_rcache_vma_tree_node_compare_search);
|
|
|
|
|
|
|
|
if(!vma)
|
|
|
|
return NULL;
|
|
|
|
|
2008-01-08 08:55:42 +00:00
|
|
|
for(item = (mca_rcache_vma_reg_list_item_t*)
|
|
|
|
opal_list_get_first(&vma->reg_list);
|
|
|
|
item != (mca_rcache_vma_reg_list_item_t*)
|
|
|
|
opal_list_get_end(&vma->reg_list);
|
|
|
|
item = (mca_rcache_vma_reg_list_item_t*)
|
|
|
|
opal_list_get_next(item)) {
|
|
|
|
if(item->reg->flags & MCA_MPOOL_FLAGS_INVALID)
|
|
|
|
continue;
|
2006-12-17 12:26:41 +00:00
|
|
|
if(item->reg->bound >= bound)
|
|
|
|
return item->reg;
|
|
|
|
if(!(item->reg->flags & MCA_MPOOL_FLAGS_PERSIST))
|
|
|
|
break;
|
2008-01-08 08:55:42 +00:00
|
|
|
}
|
2006-08-15 18:40:08 +00:00
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2008-01-08 08:55:42 +00:00
|
|
|
static inline bool is_reg_in_array(mca_mpool_base_registration_t **regs,
|
|
|
|
int cnt, mca_mpool_base_registration_t *p)
|
2006-12-17 12:26:41 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2008-01-08 08:55:42 +00:00
|
|
|
for(i = 0; i < cnt; i++) {
|
|
|
|
if(regs[i] == p)
|
2006-12-17 12:26:41 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
int mca_rcache_vma_tree_find_all(
|
|
|
|
mca_rcache_vma_module_t *vma_rcache, unsigned char *base,
|
2008-01-08 08:55:42 +00:00
|
|
|
unsigned char *bound, mca_mpool_base_registration_t **regs,
|
|
|
|
int reg_cnt)
|
2006-12-17 12:26:41 +00:00
|
|
|
{
|
|
|
|
int cnt = 0;
|
|
|
|
|
|
|
|
if(opal_list_get_size(&vma_rcache->vma_list) == 0)
|
|
|
|
return cnt;
|
|
|
|
|
|
|
|
do {
|
|
|
|
mca_rcache_vma_t *vma;
|
|
|
|
opal_list_item_t *item;
|
2008-01-08 08:55:42 +00:00
|
|
|
vma = (mca_rcache_vma_t*)
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
opal_rb_tree_find_with(&vma_rcache->rb_tree, base,
|
2006-12-17 12:26:41 +00:00
|
|
|
mca_rcache_vma_tree_node_compare_closest);
|
|
|
|
|
|
|
|
if(NULL == vma) {
|
|
|
|
/* base is bigger than any registered memory */
|
2007-07-23 16:18:36 +00:00
|
|
|
break;
|
2006-12-17 12:26:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if(base < (unsigned char*)vma->start) {
|
|
|
|
base = (unsigned char*)vma->start;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
for(item = opal_list_get_first(&vma->reg_list);
|
|
|
|
item != opal_list_get_end(&vma->reg_list);
|
|
|
|
item = opal_list_get_next(item)) {
|
|
|
|
mca_rcache_vma_reg_list_item_t *vma_item;
|
|
|
|
vma_item = (mca_rcache_vma_reg_list_item_t*)item;
|
2008-01-08 08:55:42 +00:00
|
|
|
if((vma_item->reg->flags & MCA_MPOOL_FLAGS_INVALID) ||
|
|
|
|
is_reg_in_array(regs, cnt, vma_item->reg)) {
|
2006-12-17 12:26:41 +00:00
|
|
|
continue;
|
|
|
|
}
|
2008-01-08 08:55:42 +00:00
|
|
|
regs[cnt++] = vma_item->reg;
|
|
|
|
if(cnt == reg_cnt)
|
|
|
|
return cnt; /* no space left in the provided array */
|
2006-12-17 12:26:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
base = (unsigned char *)vma->end + 1;
|
|
|
|
} while(bound >= base);
|
|
|
|
|
|
|
|
return cnt;
|
|
|
|
}
|
|
|
|
|
2006-08-15 18:40:08 +00:00
|
|
|
static inline int mca_rcache_vma_can_insert(
|
2006-12-17 12:26:41 +00:00
|
|
|
mca_rcache_vma_module_t *vma_rcache, size_t nbytes, size_t limit)
|
2006-08-15 18:40:08 +00:00
|
|
|
{
|
2006-12-17 12:26:41 +00:00
|
|
|
if(0 == limit)
|
2006-08-15 18:40:08 +00:00
|
|
|
return 1;
|
|
|
|
|
2006-12-17 12:26:41 +00:00
|
|
|
if(vma_rcache->reg_cur_cache_size + nbytes <= limit)
|
2006-08-15 18:40:08 +00:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void mca_rcache_vma_update_byte_count(
|
|
|
|
mca_rcache_vma_module_t* vma_rcache,
|
|
|
|
size_t nbytes)
|
|
|
|
{
|
2006-12-17 12:26:41 +00:00
|
|
|
vma_rcache->reg_cur_cache_size += nbytes;
|
2006-08-15 18:40:08 +00:00
|
|
|
}
|
|
|
|
|
2006-12-17 12:26:41 +00:00
|
|
|
int mca_rcache_vma_tree_insert(mca_rcache_vma_module_t* vma_rcache,
|
|
|
|
mca_mpool_base_registration_t* reg, size_t limit)
|
2006-08-15 18:40:08 +00:00
|
|
|
{
|
|
|
|
mca_rcache_vma_t *i;
|
|
|
|
uintptr_t begin = (uintptr_t)reg->base, end = (uintptr_t)reg->bound;
|
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
i = (mca_rcache_vma_t*)opal_rb_tree_find_with(&vma_rcache->rb_tree,
|
2006-08-15 18:40:08 +00:00
|
|
|
(void*)begin, mca_rcache_vma_tree_node_compare_closest);
|
|
|
|
|
|
|
|
if(!i)
|
|
|
|
i = (mca_rcache_vma_t*)opal_list_get_end(&vma_rcache->vma_list);
|
|
|
|
|
|
|
|
while (begin <= end) {
|
|
|
|
mca_rcache_vma_t *vma;
|
|
|
|
|
|
|
|
if((mca_rcache_vma_t*)opal_list_get_end(&vma_rcache->vma_list) == i) {
|
|
|
|
vma = NULL;
|
2006-12-17 12:26:41 +00:00
|
|
|
if(mca_rcache_vma_can_insert(vma_rcache, end - begin + 1, limit))
|
2006-11-01 13:44:47 +00:00
|
|
|
vma = mca_rcache_vma_new(vma_rcache, begin, end);
|
2006-08-15 18:40:08 +00:00
|
|
|
|
|
|
|
if(!vma)
|
|
|
|
goto remove;
|
|
|
|
|
|
|
|
mca_rcache_vma_update_byte_count(vma_rcache, end - begin + 1);
|
|
|
|
|
|
|
|
opal_list_append(&vma_rcache->vma_list, &vma->super);
|
|
|
|
begin = vma->end + 1;
|
|
|
|
mca_rcache_vma_add_reg(vma, reg);
|
|
|
|
} else if(i->start > begin) {
|
|
|
|
uintptr_t tend = (i->start <= end)?(i->start - 1):end;
|
|
|
|
vma = NULL;
|
2006-12-17 12:26:41 +00:00
|
|
|
if(mca_rcache_vma_can_insert(vma_rcache, tend - begin + 1, limit))
|
2006-11-01 13:44:47 +00:00
|
|
|
vma = mca_rcache_vma_new(vma_rcache, begin, tend);
|
2006-08-15 18:40:08 +00:00
|
|
|
|
|
|
|
if(!vma)
|
|
|
|
goto remove;
|
|
|
|
|
|
|
|
mca_rcache_vma_update_byte_count(vma_rcache, tend - begin + 1);
|
|
|
|
|
|
|
|
/* insert before */
|
|
|
|
opal_list_insert_pos(&vma_rcache->vma_list, &i->super, &vma->super);
|
|
|
|
i = vma;
|
|
|
|
begin = vma->end + 1;
|
|
|
|
mca_rcache_vma_add_reg(vma, reg);
|
|
|
|
} else if(i->start == begin) {
|
|
|
|
if (i->end > end) {
|
2006-11-01 13:44:47 +00:00
|
|
|
vma = mca_rcache_vma_new(vma_rcache, end+1, i->end);
|
2006-08-15 18:40:08 +00:00
|
|
|
if(!vma)
|
|
|
|
goto remove;
|
|
|
|
|
|
|
|
i->end = end;
|
|
|
|
|
|
|
|
mca_rcache_vma_copy_reg_list(vma, i);
|
|
|
|
|
|
|
|
/* add after */
|
|
|
|
opal_list_insert_pos(&vma_rcache->vma_list,
|
|
|
|
opal_list_get_next(&i->super),
|
|
|
|
&vma->super);
|
|
|
|
mca_rcache_vma_add_reg(i, reg);
|
|
|
|
begin = end + 1;
|
|
|
|
} else {
|
|
|
|
mca_rcache_vma_add_reg(i, reg);
|
|
|
|
begin = i->end + 1;
|
|
|
|
}
|
|
|
|
} else {
|
2006-11-01 13:44:47 +00:00
|
|
|
vma = mca_rcache_vma_new(vma_rcache, begin, i->end);
|
2006-08-15 18:40:08 +00:00
|
|
|
|
|
|
|
if(!vma)
|
|
|
|
goto remove;
|
|
|
|
|
|
|
|
i->end = begin - 1;
|
|
|
|
|
|
|
|
mca_rcache_vma_copy_reg_list(vma, i);
|
|
|
|
|
|
|
|
/* add after */
|
|
|
|
opal_list_insert_pos(&vma_rcache->vma_list,
|
|
|
|
opal_list_get_next(&i->super),
|
|
|
|
&vma->super);
|
|
|
|
}
|
|
|
|
|
|
|
|
i = (mca_rcache_vma_t*)opal_list_get_next(&i->super);
|
|
|
|
}
|
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_SUCCESS;
|
2006-08-15 18:40:08 +00:00
|
|
|
|
|
|
|
remove:
|
|
|
|
mca_rcache_vma_tree_delete(vma_rcache, reg);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
|
2006-08-15 18:40:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Function to remove previously memory from the tree without freeing it
|
|
|
|
*
|
|
|
|
* @param base pointer to the memory to free
|
|
|
|
*
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
* @retval OPAL_SUCCESS
|
|
|
|
* @retval OPAL_ERR_BAD_PARAM if the passed base pointer was invalid
|
2006-08-15 18:40:08 +00:00
|
|
|
*/
|
|
|
|
int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache,
|
|
|
|
mca_mpool_base_registration_t* reg)
|
|
|
|
{
|
|
|
|
mca_rcache_vma_t *vma;
|
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
vma = (mca_rcache_vma_t*)opal_rb_tree_find_with(&vma_rcache->rb_tree, reg->base,
|
2006-08-15 18:40:08 +00:00
|
|
|
mca_rcache_vma_tree_node_compare_search);
|
|
|
|
|
|
|
|
if(!vma)
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_ERROR;
|
2006-08-15 18:40:08 +00:00
|
|
|
|
|
|
|
while(vma != (mca_rcache_vma_t*)opal_list_get_end(&vma_rcache->vma_list)
|
2006-10-18 10:55:01 +00:00
|
|
|
&& vma->start <= (uintptr_t)reg->bound) {
|
2006-08-15 18:40:08 +00:00
|
|
|
mca_rcache_vma_remove_reg(vma, reg);
|
|
|
|
|
|
|
|
if(opal_list_is_empty(&vma->reg_list)) {
|
|
|
|
mca_rcache_vma_t *next = (mca_rcache_vma_t*)opal_list_get_next(&vma->super);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
opal_rb_tree_delete(&vma_rcache->rb_tree, vma);
|
2006-08-15 18:40:08 +00:00
|
|
|
mca_rcache_vma_update_byte_count(vma_rcache,
|
|
|
|
vma->start - vma->end - 1);
|
|
|
|
opal_list_remove_item(&vma_rcache->vma_list, &vma->super);
|
2010-02-22 11:19:15 +00:00
|
|
|
opal_list_append(&vma_rcache->vma_delete_list, &vma->super);
|
2006-08-15 18:40:08 +00:00
|
|
|
vma = next;
|
|
|
|
} else {
|
|
|
|
int merged;
|
|
|
|
|
|
|
|
do {
|
|
|
|
mca_rcache_vma_t *prev = NULL, *next = NULL;
|
|
|
|
if(opal_list_get_begin(&vma_rcache->vma_list) !=
|
|
|
|
opal_list_get_prev(vma))
|
|
|
|
prev = (mca_rcache_vma_t*)opal_list_get_prev(vma);
|
|
|
|
merged = 0;
|
|
|
|
|
|
|
|
if(prev && vma->start == prev->end + 1 &&
|
|
|
|
mca_rcache_vma_compare_reg_lists(vma, prev)) {
|
|
|
|
prev->end = vma->end;
|
|
|
|
opal_list_remove_item(&vma_rcache->vma_list, &vma->super);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
opal_rb_tree_delete(&vma_rcache->rb_tree, vma);
|
2010-02-22 11:19:15 +00:00
|
|
|
opal_list_append(&vma_rcache->vma_delete_list, &vma->super);
|
2006-08-15 18:40:08 +00:00
|
|
|
vma = prev;
|
|
|
|
merged = 1;
|
|
|
|
}
|
|
|
|
if(opal_list_get_end(&vma_rcache->vma_list) !=
|
|
|
|
opal_list_get_next(vma))
|
|
|
|
next = (mca_rcache_vma_t*)opal_list_get_next(vma);
|
|
|
|
|
|
|
|
if(next && vma->end + 1 == next->start &&
|
|
|
|
mca_rcache_vma_compare_reg_lists(vma, next)) {
|
|
|
|
vma->end = next->end;
|
|
|
|
opal_list_remove_item(&vma_rcache->vma_list, &next->super);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
opal_rb_tree_delete(&vma_rcache->rb_tree, next);
|
2010-02-22 11:19:15 +00:00
|
|
|
opal_list_append(&vma_rcache->vma_delete_list, &next->super);
|
2006-08-15 18:40:08 +00:00
|
|
|
merged = 1;
|
|
|
|
}
|
|
|
|
} while(merged);
|
|
|
|
vma = (mca_rcache_vma_t*)opal_list_get_next(vma);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2013-09-17 15:40:32 +00:00
|
|
|
|
|
|
|
/* Dump out rcache entries within a range of memory. Useful for debugging. */
|
|
|
|
void mca_rcache_vma_tree_dump_range(mca_rcache_vma_module_t *vma_rcache,
|
2013-09-18 21:08:15 +00:00
|
|
|
unsigned char *base, size_t size, char *msg)
|
2013-09-17 15:40:32 +00:00
|
|
|
{
|
|
|
|
unsigned char * bound = base + size -1;
|
|
|
|
mca_mpool_base_registration_t *reg;
|
|
|
|
|
2013-09-18 21:08:15 +00:00
|
|
|
if (NULL == msg) {
|
|
|
|
msg = "";
|
|
|
|
}
|
|
|
|
|
|
|
|
opal_output(0, "Dumping rcache entries: %s", msg);
|
|
|
|
|
2013-10-22 17:28:12 +00:00
|
|
|
if(opal_list_is_empty(&vma_rcache->vma_list)) {
|
2013-09-18 21:08:15 +00:00
|
|
|
opal_output(0, " rcache is empty");
|
2013-09-17 15:40:32 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
mca_rcache_vma_t *vma;
|
|
|
|
opal_list_item_t *item;
|
|
|
|
vma = (mca_rcache_vma_t*)
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
opal_rb_tree_find_with(&vma_rcache->rb_tree, base,
|
2013-09-17 15:40:32 +00:00
|
|
|
mca_rcache_vma_tree_node_compare_closest);
|
|
|
|
|
|
|
|
if(NULL == vma) {
|
|
|
|
/* base is bigger than any registered memory */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(base < (unsigned char*)vma->start) {
|
|
|
|
base = (unsigned char*)vma->start;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2013-10-22 17:28:12 +00:00
|
|
|
opal_output(0, " vma: base=%p, bound=%p, size=%lu, number of registrations=%d",
|
|
|
|
(void *)vma->start, (void *)vma->end, vma->end - vma->start + 1,
|
2013-09-18 21:08:15 +00:00
|
|
|
(int)opal_list_get_size(&vma->reg_list));
|
2013-09-17 15:40:32 +00:00
|
|
|
for(item = opal_list_get_first(&vma->reg_list);
|
|
|
|
item != opal_list_get_end(&vma->reg_list);
|
|
|
|
item = opal_list_get_next(item)) {
|
|
|
|
mca_rcache_vma_reg_list_item_t *vma_item;
|
|
|
|
vma_item = (mca_rcache_vma_reg_list_item_t*)item;
|
|
|
|
reg = vma_item->reg;
|
2013-09-18 21:08:15 +00:00
|
|
|
opal_output(0, " reg: base=%p, bound=%p, alloc_base=%p, ref_count=%d, flags=0x%x",
|
|
|
|
reg->base, reg->bound, reg->alloc_base, reg->ref_count, reg->flags);
|
2013-09-17 15:40:32 +00:00
|
|
|
}
|
|
|
|
base = (unsigned char *)vma->end + 1;
|
|
|
|
} while(bound >= base);
|
|
|
|
}
|