2011-12-09 21:24:07 +00:00
|
|
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
|
|
/*
|
2015-01-05 16:03:15 -07:00
|
|
|
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
2011-12-09 21:24:07 +00:00
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
2015-06-18 09:53:20 -07:00
|
|
|
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
2011-12-09 21:24:07 +00:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
#include "opal_config.h"
|
2011-12-09 21:24:07 +00:00
|
|
|
|
|
|
|
#include "btl_ugni.h"
|
|
|
|
#include "btl_ugni_frag.h"
|
2012-05-31 20:02:41 +00:00
|
|
|
#include "btl_ugni_smsg.h"
|
2012-02-10 00:47:29 +00:00
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
#include "opal/include/opal/align.h"
|
2015-06-18 09:53:20 -07:00
|
|
|
#include "opal/mca/pmix/pmix.h"
|
2014-08-29 22:53:35 +00:00
|
|
|
|
2014-08-11 16:15:39 +00:00
|
|
|
#define INITIAL_GNI_EPS 10000
|
2013-11-18 04:58:37 +00:00
|
|
|
|
2012-02-10 00:47:29 +00:00
|
|
|
static int
|
|
|
|
mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module);
|
2012-04-19 21:51:44 +00:00
|
|
|
static void
|
|
|
|
mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_procs);
|
2013-11-18 04:58:37 +00:00
|
|
|
static int mca_btl_ugni_smsg_setup (int nprocs);
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2015-05-04 16:11:34 -06:00
|
|
|
int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
|
|
|
|
struct opal_proc_t **procs,
|
|
|
|
struct mca_btl_base_endpoint_t **peers,
|
|
|
|
opal_bitmap_t *reachable) {
|
2011-12-09 21:24:07 +00:00
|
|
|
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
|
|
|
int rc;
|
2014-12-24 10:41:58 -07:00
|
|
|
void *mmap_start_addr;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
if (false == ugni_module->initialized) {
|
2014-08-08 18:02:46 +00:00
|
|
|
|
2014-08-11 16:15:39 +00:00
|
|
|
/* TODO: need to think of something more elegant than this max array */
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2014-08-11 16:15:39 +00:00
|
|
|
rc = opal_pointer_array_init (&ugni_module->endpoints, INITIAL_GNI_EPS, 1 << 24, 512);
|
2013-11-18 04:58:37 +00:00
|
|
|
if (OPAL_SUCCESS != rc) {
|
|
|
|
BTL_ERROR(("error inializing the endpoint array. rc = %d", rc));
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* NTH: might want to vary this size based off the universe size (if
|
|
|
|
* one exists). the table is only used for connection lookup and
|
|
|
|
* endpoint removal. */
|
|
|
|
rc = opal_hash_table_init (&ugni_module->id_to_endpoint, 512);
|
|
|
|
if (OPAL_SUCCESS != rc) {
|
|
|
|
BTL_ERROR(("error initializing the endpoint hash. rc = %d", rc));
|
|
|
|
return rc;
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-04 16:11:34 -06:00
|
|
|
for (size_t i = 0 ; i < nprocs ; ++i) {
|
2015-01-05 16:03:15 -07:00
|
|
|
struct opal_proc_t *opal_proc = procs[i];
|
|
|
|
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2015-05-04 16:11:34 -06:00
|
|
|
/* check for an existing endpoint */
|
|
|
|
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
|
|
|
|
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) (peers + i))) {
|
|
|
|
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
|
|
|
|
ugni_module->nlocal_procs++;
|
2013-11-18 04:58:37 +00:00
|
|
|
|
2015-05-04 16:11:34 -06:00
|
|
|
/* ugni is allowed on local processes to provide support for network
|
|
|
|
* atomic operations */
|
|
|
|
}
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2015-05-04 16:11:34 -06:00
|
|
|
/* Create and Init endpoints */
|
|
|
|
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc);
|
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
|
|
|
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
|
|
|
|
BTL_ERROR(("btl/ugni error initializing endpoint"));
|
|
|
|
return rc;
|
|
|
|
}
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2015-05-04 16:11:34 -06:00
|
|
|
/* go ahead and connect the local endpoint for RDMA/CQ write */
|
|
|
|
if (opal_proc == opal_proc_local_get ()) {
|
|
|
|
ugni_module->local_ep = peers[i];
|
|
|
|
}
|
2015-01-05 16:03:15 -07:00
|
|
|
|
2015-05-04 16:11:34 -06:00
|
|
|
/* Add this endpoint to the pointer array. */
|
|
|
|
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i]));
|
|
|
|
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]);
|
2013-11-18 04:58:37 +00:00
|
|
|
|
2015-05-04 16:11:34 -06:00
|
|
|
++ugni_module->endpoint_count;
|
|
|
|
}
|
|
|
|
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
|
|
|
|
|
|
|
|
/* Set the reachable bit if necessary */
|
|
|
|
if (reachable) {
|
|
|
|
rc = opal_bitmap_set_bit (reachable, i);
|
|
|
|
}
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs);
|
2012-04-19 21:51:44 +00:00
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
if (false == ugni_module->initialized) {
|
2014-10-08 10:10:19 -06:00
|
|
|
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
2012-05-07 17:22:35 +00:00
|
|
|
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
|
|
|
|
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->rdma_local_cq);
|
2014-10-08 10:10:19 -06:00
|
|
|
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
2012-05-07 17:22:35 +00:00
|
|
|
if (GNI_RC_SUCCESS != rc) {
|
|
|
|
BTL_ERROR(("error creating local BTE/FMA CQ"));
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return opal_common_rc_ugni_to_opal (rc);
|
2012-05-07 17:22:35 +00:00
|
|
|
}
|
|
|
|
|
2014-10-08 10:10:19 -06:00
|
|
|
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
2012-05-07 17:22:35 +00:00
|
|
|
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
|
|
|
|
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->smsg_local_cq);
|
2014-10-08 10:10:19 -06:00
|
|
|
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
2012-05-07 17:22:35 +00:00
|
|
|
if (GNI_RC_SUCCESS != rc) {
|
|
|
|
BTL_ERROR(("error creating local SMSG CQ"));
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return opal_common_rc_ugni_to_opal (rc);
|
2012-05-07 17:22:35 +00:00
|
|
|
}
|
|
|
|
|
2014-10-08 10:10:19 -06:00
|
|
|
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
2012-05-31 20:02:41 +00:00
|
|
|
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.remote_cq_size,
|
2012-05-07 17:22:35 +00:00
|
|
|
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->smsg_remote_cq);
|
2014-10-08 10:10:19 -06:00
|
|
|
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
2012-05-07 17:22:35 +00:00
|
|
|
if (GNI_RC_SUCCESS != rc) {
|
|
|
|
BTL_ERROR(("error creating remote SMSG CQ"));
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return opal_common_rc_ugni_to_opal (rc);
|
2012-05-07 17:22:35 +00:00
|
|
|
}
|
|
|
|
|
2014-12-24 10:41:58 -07:00
|
|
|
if (mca_btl_ugni_component.progress_thread_enabled) {
|
2014-12-04 16:18:16 -07:00
|
|
|
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
|
|
|
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
|
|
|
|
0, GNI_CQ_BLOCKING, NULL, NULL, &ugni_module->rdma_local_irq_cq);
|
|
|
|
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
|
|
|
if (GNI_RC_SUCCESS != rc) {
|
|
|
|
BTL_ERROR(("error creating local BTE/FMA CQ"));
|
|
|
|
return opal_common_rc_ugni_to_opal (rc);
|
|
|
|
}
|
|
|
|
|
|
|
|
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
|
|
|
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.remote_cq_size,
|
|
|
|
0, GNI_CQ_BLOCKING, NULL, NULL, &ugni_module->smsg_remote_irq_cq);
|
|
|
|
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
|
|
|
if (GNI_RC_SUCCESS != rc) {
|
|
|
|
BTL_ERROR(("error creating remote SMSG CQ"));
|
|
|
|
return opal_common_rc_ugni_to_opal (rc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-04-19 21:51:44 +00:00
|
|
|
rc = mca_btl_ugni_setup_mpools (ugni_module);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
2012-04-19 21:51:44 +00:00
|
|
|
BTL_ERROR(("btl/ugni error setting up mpools/free lists"));
|
|
|
|
return rc;
|
|
|
|
}
|
2012-05-31 20:02:41 +00:00
|
|
|
|
|
|
|
rc = mca_btl_ugni_smsg_init (ugni_module);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
2012-05-31 20:02:41 +00:00
|
|
|
BTL_ERROR(("btl/ugni error initializing SMSG"));
|
|
|
|
return rc;
|
|
|
|
}
|
2012-04-19 21:51:44 +00:00
|
|
|
|
2014-12-24 10:41:58 -07:00
|
|
|
/*
|
|
|
|
* If progress thread enabled, registered a page of memory
|
|
|
|
* with the smsg_remote_irq_cq. This memory handle is passed
|
|
|
|
* to ranks which want to communicate with this rank. A rank which
|
|
|
|
* posts a GNI_PostCqWrite targeting this memory handle generates
|
|
|
|
* an IRQ at the target node, which ultimately causes the progress
|
|
|
|
* thread in the target rank to become schedulable.
|
|
|
|
*/
|
|
|
|
if (mca_btl_ugni_component.progress_thread_enabled) {
|
|
|
|
mmap_start_addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
|
|
|
if (NULL == mmap_start_addr) {
|
|
|
|
BTL_ERROR(("btl/ugni mmap returned error"));
|
|
|
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
2014-12-04 16:18:16 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
2014-12-09 22:56:04 -08:00
|
|
|
rc = GNI_MemRegister(ugni_module->device->dev_handle,
|
2014-12-24 10:41:58 -07:00
|
|
|
(unsigned long)mmap_start_addr,
|
2014-12-04 16:18:16 -07:00
|
|
|
4096,
|
|
|
|
ugni_module->smsg_remote_irq_cq,
|
|
|
|
GNI_MEM_READWRITE,
|
|
|
|
-1,
|
|
|
|
&ugni_module->device->smsg_irq_mhndl);
|
|
|
|
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
|
|
|
|
|
|
|
mca_btl_ugni_spawn_progress_thread(btl);
|
|
|
|
}
|
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
ugni_module->initialized = true;
|
|
|
|
}
|
2011-12-09 21:24:07 +00:00
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_SUCCESS;
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
size_t nprocs, struct opal_proc_t **procs,
|
2011-12-09 21:24:07 +00:00
|
|
|
struct mca_btl_base_endpoint_t **peers) {
|
|
|
|
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
|
|
|
size_t i;
|
2013-11-18 04:58:37 +00:00
|
|
|
int rc;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
while (ugni_module->active_send_count) {
|
|
|
|
/* ensure all sends are complete before removing and procs */
|
|
|
|
rc = mca_btl_ugni_progress_local_smsg (ugni_module);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
if (OPAL_SUCCESS != rc) {
|
2013-11-18 04:58:37 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2011-12-09 21:24:07 +00:00
|
|
|
|
|
|
|
for (i = 0 ; i < nprocs ; ++i) {
|
2015-01-05 16:03:15 -07:00
|
|
|
struct opal_proc_t *opal_proc = procs[i];
|
|
|
|
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
|
2013-11-18 04:58:37 +00:00
|
|
|
mca_btl_base_endpoint_t *ep = NULL;
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
/* lookup this proc in the hash table */
|
|
|
|
(void) opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2014-05-13 21:22:33 +00:00
|
|
|
BTL_VERBOSE(("deleting endpoint with proc id 0x%" PRIx64 ", ptr: %p", proc_id, (void *) ep));
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
if (NULL != ep) {
|
|
|
|
mca_btl_ugni_release_ep (ep);
|
|
|
|
--ugni_module->endpoint_count;
|
|
|
|
}
|
2011-12-09 21:24:07 +00:00
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
/* remote the endpoint from the hash table */
|
|
|
|
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, NULL);
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_SUCCESS;
|
2011-12-09 21:24:07 +00:00
|
|
|
}
|
2012-02-10 00:47:29 +00:00
|
|
|
|
2015-05-04 16:11:34 -06:00
|
|
|
|
|
|
|
struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc)
|
|
|
|
{
|
|
|
|
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) module;
|
|
|
|
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(proc->proc_name);
|
|
|
|
mca_btl_base_endpoint_t *ep;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
|
|
|
|
|
|
|
|
do {
|
|
|
|
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
|
|
|
|
if (OPAL_SUCCESS == rc) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create and Init endpoints */
|
|
|
|
rc = mca_btl_ugni_init_ep (ugni_module, &ep, ugni_module, proc);
|
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
|
|
|
BTL_ERROR(("btl/ugni error initializing endpoint"));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Add this endpoint to the pointer array. */
|
|
|
|
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) ep));
|
|
|
|
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, ep);
|
|
|
|
} while (0);
|
|
|
|
|
|
|
|
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
|
|
|
|
|
|
|
|
return ep;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
static int ugni_reg_mem (void *reg_data, void *base, size_t size,
|
|
|
|
mca_rcache_base_registration_t *reg)
|
2012-02-10 00:47:29 +00:00
|
|
|
{
|
2012-05-31 20:02:41 +00:00
|
|
|
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
|
2012-02-10 00:47:29 +00:00
|
|
|
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg;
|
2015-11-02 12:07:08 -07:00
|
|
|
gni_cq_handle_t cq = NULL;
|
2012-03-15 20:13:32 +00:00
|
|
|
gni_return_t rc;
|
2015-09-29 15:28:00 -06:00
|
|
|
int flags;
|
2012-04-19 21:51:44 +00:00
|
|
|
|
2012-05-31 20:02:41 +00:00
|
|
|
if (ugni_module->reg_count >= ugni_module->reg_max) {
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
2012-05-31 20:02:41 +00:00
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
if (reg->access_flags & (MCA_RCACHE_ACCESS_REMOTE_WRITE | MCA_RCACHE_ACCESS_LOCAL_WRITE |
|
|
|
|
MCA_RCACHE_ACCESS_REMOTE_ATOMIC)) {
|
2015-09-29 15:28:00 -06:00
|
|
|
flags = GNI_MEM_READWRITE;
|
|
|
|
} else {
|
|
|
|
flags = GNI_MEM_READ_ONLY;
|
|
|
|
}
|
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
if (!(reg->flags & MCA_RCACHE_FLAGS_SO_MEM)) {
|
|
|
|
flags |= GNI_MEM_RELAXED_PI_ORDERING;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (reg->flags & MCA_RCACHE_FLAGS_RESV0) {
|
|
|
|
cq = ugni_module->smsg_remote_cq;
|
|
|
|
}
|
2015-09-29 15:28:00 -06:00
|
|
|
|
2014-10-08 10:10:19 -06:00
|
|
|
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
2012-04-19 21:51:44 +00:00
|
|
|
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
|
2015-11-02 12:07:08 -07:00
|
|
|
size, cq, flags, -1, &(ugni_reg->handle.gni_handle));
|
2014-10-08 10:10:19 -06:00
|
|
|
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
|
|
|
|
2012-02-10 00:47:29 +00:00
|
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
2012-02-10 00:47:29 +00:00
|
|
|
}
|
|
|
|
|
2014-10-08 10:10:19 -06:00
|
|
|
opal_atomic_add_32(&ugni_module->reg_count,1);
|
2012-04-19 21:51:44 +00:00
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_SUCCESS;
|
2012-02-10 00:47:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2015-11-02 12:07:08 -07:00
|
|
|
ugni_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
|
2012-02-10 00:47:29 +00:00
|
|
|
{
|
2012-04-19 21:51:44 +00:00
|
|
|
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
|
2012-02-10 00:47:29 +00:00
|
|
|
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *)reg;
|
2012-03-15 20:13:32 +00:00
|
|
|
gni_return_t rc;
|
2012-02-10 00:47:29 +00:00
|
|
|
|
2014-10-08 10:10:19 -06:00
|
|
|
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
2015-01-05 16:03:15 -07:00
|
|
|
rc = GNI_MemDeregister (ugni_module->device->dev_handle, &ugni_reg->handle.gni_handle);
|
2014-10-08 10:10:19 -06:00
|
|
|
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
2012-02-10 00:47:29 +00:00
|
|
|
if (GNI_RC_SUCCESS != rc) {
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_ERROR;
|
2012-02-10 00:47:29 +00:00
|
|
|
}
|
|
|
|
|
2014-10-08 10:10:19 -06:00
|
|
|
opal_atomic_add_32(&ugni_module->reg_count,-1);
|
2012-04-19 21:51:44 +00:00
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_SUCCESS;
|
2012-02-10 00:47:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
|
|
|
{
|
2015-11-02 12:07:08 -07:00
|
|
|
mca_rcache_udreg_resources_t rcache_resources;
|
2015-06-18 09:53:20 -07:00
|
|
|
unsigned int mbox_increment;
|
|
|
|
uint32_t nprocs, *u32;
|
2015-11-02 12:07:08 -07:00
|
|
|
char *rcache_name;
|
2013-11-18 04:58:37 +00:00
|
|
|
int rc;
|
2012-02-10 00:47:29 +00:00
|
|
|
|
2012-05-07 17:22:55 +00:00
|
|
|
rc = opal_pointer_array_init (&ugni_module->pending_smsg_frags_bb, 0,
|
|
|
|
1 << 30, 32768);
|
2012-03-15 20:13:32 +00:00
|
|
|
if (OPAL_SUCCESS != rc) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
/* determine how many procs are in the job (might want to check universe size here) */
|
2015-06-18 09:53:20 -07:00
|
|
|
u32 = &nprocs;
|
|
|
|
OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_UNIV_SIZE, &OPAL_PROC_MY_NAME,
|
|
|
|
&u32, OPAL_UINT32);
|
|
|
|
if (OPAL_SUCCESS != rc) {
|
|
|
|
/* take a wild conservative guess */
|
2014-08-29 22:53:35 +00:00
|
|
|
nprocs = 512;
|
|
|
|
}
|
2013-11-18 04:58:37 +00:00
|
|
|
|
|
|
|
rc = mca_btl_ugni_smsg_setup (nprocs);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
2013-11-18 04:58:37 +00:00
|
|
|
BTL_ERROR(("error setting up smsg"));
|
|
|
|
return rc;
|
|
|
|
}
|
2012-02-10 00:47:29 +00:00
|
|
|
|
2015-02-19 13:41:41 -07:00
|
|
|
rc = opal_free_list_init (&ugni_module->smsg_frags,
|
|
|
|
sizeof (mca_btl_ugni_smsg_frag_t),
|
|
|
|
opal_cache_line_size, OBJ_CLASS(mca_btl_ugni_smsg_frag_t),
|
|
|
|
mca_btl_ugni_component.ugni_smsg_limit,
|
|
|
|
opal_cache_line_size,
|
|
|
|
mca_btl_ugni_component.ugni_free_list_num,
|
|
|
|
mca_btl_ugni_component.ugni_free_list_max,
|
|
|
|
mca_btl_ugni_component.ugni_free_list_inc,
|
|
|
|
NULL, 0, NULL, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
|
|
|
|
(void *) ugni_module);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
2012-03-15 20:13:32 +00:00
|
|
|
BTL_ERROR(("error creating smsg fragment free list"));
|
2012-02-10 00:47:29 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2015-02-19 13:41:41 -07:00
|
|
|
rc = opal_free_list_init (&ugni_module->rdma_frags,
|
|
|
|
sizeof (mca_btl_ugni_rdma_frag_t), 64,
|
|
|
|
OBJ_CLASS(mca_btl_ugni_rdma_frag_t),
|
|
|
|
0, opal_cache_line_size,
|
|
|
|
mca_btl_ugni_component.ugni_free_list_num,
|
|
|
|
mca_btl_ugni_component.ugni_free_list_max,
|
|
|
|
mca_btl_ugni_component.ugni_free_list_inc,
|
|
|
|
NULL, 0, NULL, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
|
|
|
|
(void *) ugni_module);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
2012-03-15 20:13:32 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2015-02-19 13:41:41 -07:00
|
|
|
rc = opal_free_list_init (&ugni_module->rdma_int_frags,
|
|
|
|
sizeof (mca_btl_ugni_rdma_frag_t), 8,
|
|
|
|
OBJ_CLASS(mca_btl_ugni_rdma_frag_t),
|
|
|
|
0, opal_cache_line_size, 0, -1, 64,
|
|
|
|
NULL, 0, NULL, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
|
|
|
|
(void *) ugni_module);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
2012-02-10 00:47:29 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
ugni_module->super.btl_mpool = mca_mpool_base_module_lookup (mca_btl_ugni_component.mpool_hints);
|
|
|
|
if (NULL == ugni_module->super.btl_mpool) {
|
|
|
|
BTL_ERROR(("could not find mpool matching hints %s", mca_btl_ugni_component.mpool_hints));
|
|
|
|
return OPAL_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
rcache_resources.base.cache_name = "ompi.ugni";
|
|
|
|
rcache_resources.base.reg_data = (void *) ugni_module;
|
|
|
|
rcache_resources.base.sizeof_reg = sizeof (mca_btl_ugni_reg_t);
|
|
|
|
rcache_resources.base.register_mem = ugni_reg_mem;
|
|
|
|
rcache_resources.base.deregister_mem = ugni_dereg_mem;
|
2013-11-18 04:58:37 +00:00
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
if (MCA_BTL_UGNI_RCACHE_UDREG == mca_btl_ugni_component.rcache_type) {
|
2013-11-18 04:58:37 +00:00
|
|
|
/* additional settings for the udreg mpool */
|
|
|
|
/* 4k should be large enough for any Gemini/Ares system */
|
2015-11-02 12:07:08 -07:00
|
|
|
rcache_resources.max_entries = 4096;
|
|
|
|
rcache_resources.use_kernel_cache = true;
|
2013-11-18 04:58:37 +00:00
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
rcache_resources.use_evict_w_unreg = false;
|
|
|
|
rcache_name = "udreg";
|
2013-11-18 04:58:37 +00:00
|
|
|
} else {
|
2015-11-02 12:07:08 -07:00
|
|
|
rcache_name = "grdma";
|
2013-11-18 04:58:37 +00:00
|
|
|
}
|
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
ugni_module->rcache =
|
|
|
|
mca_rcache_base_module_create (rcache_name, ugni_module->device, &rcache_resources.base);
|
2012-02-10 00:47:29 +00:00
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
if (NULL == ugni_module->rcache) {
|
|
|
|
BTL_ERROR(("error creating registration cache"));
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_ERROR;
|
2012-06-20 23:03:59 +00:00
|
|
|
}
|
2012-02-10 00:47:29 +00:00
|
|
|
|
2015-02-19 13:41:41 -07:00
|
|
|
rc = opal_free_list_init (&ugni_module->eager_frags_send,
|
|
|
|
sizeof (mca_btl_ugni_eager_frag_t), 8,
|
|
|
|
OBJ_CLASS(mca_btl_ugni_eager_frag_t),
|
|
|
|
ugni_module->super.btl_eager_limit, 64,
|
|
|
|
mca_btl_ugni_component.ugni_eager_num,
|
|
|
|
mca_btl_ugni_component.ugni_eager_max,
|
|
|
|
mca_btl_ugni_component.ugni_eager_inc,
|
2015-11-02 12:07:08 -07:00
|
|
|
ugni_module->super.btl_mpool, 0, ugni_module->rcache,
|
2015-02-19 13:41:41 -07:00
|
|
|
(opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
|
|
|
|
(void *) ugni_module);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
2012-03-15 20:13:32 +00:00
|
|
|
BTL_ERROR(("error creating eager send fragment free list"));
|
2012-02-10 00:47:29 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2015-02-19 13:41:41 -07:00
|
|
|
rc = opal_free_list_init (&ugni_module->eager_frags_recv,
|
|
|
|
sizeof (mca_btl_ugni_eager_frag_t), 8,
|
|
|
|
OBJ_CLASS(mca_btl_ugni_eager_frag_t),
|
|
|
|
ugni_module->super.btl_eager_limit, 64,
|
|
|
|
mca_btl_ugni_component.ugni_eager_num,
|
|
|
|
mca_btl_ugni_component.ugni_eager_max,
|
|
|
|
mca_btl_ugni_component.ugni_eager_inc,
|
2015-11-02 12:07:08 -07:00
|
|
|
ugni_module->super.btl_mpool, 0, ugni_module->rcache,
|
2015-02-19 13:41:41 -07:00
|
|
|
(opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
|
|
|
|
(void *) ugni_module);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
2012-03-15 20:13:32 +00:00
|
|
|
BTL_ERROR(("error creating eager receive fragment free list"));
|
2012-02-10 00:47:29 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
if (0 == mca_btl_ugni_component.mbox_increment) {
|
|
|
|
/* limit mailbox allocations to either 12.5% of available registrations
|
|
|
|
or 2MiB per allocation */
|
2014-10-08 14:58:09 -06:00
|
|
|
mbox_increment = (unsigned int) (2097152.0 / (float)mca_btl_ugni_component.smsg_mbox_size);
|
2012-02-10 00:47:29 +00:00
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
/* we may end up using more */
|
2014-10-08 14:58:09 -06:00
|
|
|
if (nprocs/mbox_increment > (unsigned int) ugni_module->reg_max / 8) {
|
2013-11-18 04:58:37 +00:00
|
|
|
mbox_increment = nprocs / (ugni_module->reg_max >> 3);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
mbox_increment = mca_btl_ugni_component.mbox_increment;
|
2012-05-10 00:24:42 +00:00
|
|
|
}
|
|
|
|
|
2015-11-02 12:07:08 -07:00
|
|
|
/* use the MCA_RCACHE_FLAGS_RESV0 to signal this is smsg memory */
|
2015-02-19 13:41:41 -07:00
|
|
|
rc = opal_free_list_init (&ugni_module->smsg_mboxes,
|
|
|
|
sizeof (mca_btl_ugni_smsg_mbox_t), 8,
|
|
|
|
OBJ_CLASS(mca_btl_ugni_smsg_mbox_t),
|
|
|
|
mca_btl_ugni_component.smsg_mbox_size, 128,
|
2015-11-02 12:07:08 -07:00
|
|
|
32, -1, mbox_increment, ugni_module->super.btl_mpool,
|
|
|
|
MCA_RCACHE_FLAGS_SO_MEM | MCA_RCACHE_FLAGS_RESV0,
|
|
|
|
ugni_module->rcache, NULL, NULL);
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
2012-03-15 20:13:11 +00:00
|
|
|
BTL_ERROR(("error creating smsg mailbox free list"));
|
2012-02-10 00:47:29 +00:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2015-02-19 13:41:41 -07:00
|
|
|
rc = opal_free_list_init (&ugni_module->post_descriptors,
|
|
|
|
sizeof (mca_btl_ugni_post_descriptor_t),
|
|
|
|
8, OBJ_CLASS(mca_btl_ugni_post_descriptor_t),
|
|
|
|
0, 0, 0, -1, 256, NULL, 0, NULL, NULL, NULL);
|
2015-01-05 16:03:15 -07:00
|
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
|
|
|
BTL_ERROR(("error creating post descriptor free list"));
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_SUCCESS;
|
2012-02-10 00:47:29 +00:00
|
|
|
}
|
|
|
|
|
2012-04-19 21:51:44 +00:00
|
|
|
static void
|
|
|
|
mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_procs)
|
|
|
|
{
|
|
|
|
if (0 == mca_btl_ugni_component.max_mem_reg) {
|
|
|
|
#if defined(HAVE_GNI_GETJOBRESINFO)
|
|
|
|
gni_job_res_desc_t res_des;
|
|
|
|
gni_return_t grc;
|
2013-11-18 04:58:37 +00:00
|
|
|
int fuzz = 20;
|
2012-04-19 21:51:44 +00:00
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
grc = GNI_GetJobResInfo (ugni_module->device->dev_id, opal_common_ugni_module.ptag,
|
2012-04-19 21:51:44 +00:00
|
|
|
GNI_JOB_RES_MDD, &res_des);
|
|
|
|
if (GNI_RC_SUCCESS == grc) {
|
2013-11-18 04:58:37 +00:00
|
|
|
ugni_module->reg_max = (res_des.limit - fuzz) / nlocal_procs;
|
2012-04-19 21:51:44 +00:00
|
|
|
}
|
|
|
|
#else
|
|
|
|
/* no way to determine the maximum registration count */
|
|
|
|
ugni_module->reg_max = 1200 / nlocal_procs;
|
|
|
|
#endif
|
|
|
|
} else if (-1 == mca_btl_ugni_component.max_mem_reg) {
|
|
|
|
ugni_module->reg_max = INT_MAX;
|
|
|
|
} else {
|
|
|
|
ugni_module->reg_max = mca_btl_ugni_component.max_mem_reg;
|
|
|
|
}
|
|
|
|
|
|
|
|
ugni_module->reg_count = 0;
|
|
|
|
}
|
|
|
|
|
2013-11-18 04:58:37 +00:00
|
|
|
static int mca_btl_ugni_smsg_setup (int nprocs)
|
|
|
|
{
|
|
|
|
gni_smsg_attr_t tmp_smsg_attrib;
|
|
|
|
unsigned int mbox_size;
|
|
|
|
gni_return_t grc;
|
|
|
|
|
|
|
|
if (0 == mca_btl_ugni_component.ugni_smsg_limit) {
|
|
|
|
/* auto-set the smsg limit based on the number of ranks */
|
|
|
|
if (nprocs <= 512) {
|
|
|
|
mca_btl_ugni_component.ugni_smsg_limit = 8192;
|
|
|
|
} else if (nprocs <= 1024) {
|
|
|
|
mca_btl_ugni_component.ugni_smsg_limit = 2048;
|
|
|
|
} else if (nprocs <= 8192) {
|
|
|
|
mca_btl_ugni_component.ugni_smsg_limit = 1024;
|
|
|
|
} else if (nprocs <= 16384) {
|
|
|
|
mca_btl_ugni_component.ugni_smsg_limit = 512;
|
|
|
|
} else {
|
|
|
|
mca_btl_ugni_component.ugni_smsg_limit = 256;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mca_btl_ugni_component.smsg_max_data = mca_btl_ugni_component.ugni_smsg_limit -
|
|
|
|
sizeof (mca_btl_ugni_send_frag_hdr_t);
|
|
|
|
|
|
|
|
if (mca_btl_ugni_component.ugni_smsg_limit == mca_btl_ugni_module.super.btl_eager_limit) {
|
|
|
|
mca_btl_ugni_module.super.btl_eager_limit = mca_btl_ugni_component.smsg_max_data;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* calculate mailbox size */
|
|
|
|
tmp_smsg_attrib.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
|
|
|
|
tmp_smsg_attrib.msg_maxsize = mca_btl_ugni_component.ugni_smsg_limit;
|
|
|
|
tmp_smsg_attrib.mbox_maxcredit = mca_btl_ugni_component.smsg_max_credits;
|
|
|
|
|
|
|
|
grc = GNI_SmsgBufferSizeNeeded (&tmp_smsg_attrib, &mbox_size);
|
|
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
|
|
|
BTL_ERROR(("error in GNI_SmsgBufferSizeNeeded"));
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return opal_common_rc_ugni_to_opal (grc);
|
2013-11-18 04:58:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
mca_btl_ugni_component.smsg_mbox_size = OPAL_ALIGN(mbox_size, 64, unsigned int);
|
|
|
|
|
George did the work and deserves all the credit for it. Ralph did the merge, and deserves whatever blame results from errors in it :-)
WHAT: Open our low-level communication infrastructure by moving all necessary components (btl/rcache/allocator/mpool) down in OPAL
All the components required for inter-process communications are currently deeply integrated in the OMPI layer. Several groups/institutions have express interest in having a more generic communication infrastructure, without all the OMPI layer dependencies. This communication layer should be made available at a different software level, available to all layers in the Open MPI software stack. As an example, our ORTE layer could replace the current OOB and instead use the BTL directly, gaining access to more reactive network interfaces than TCP. Similarly, external software libraries could take advantage of our highly optimized AM (active message) communication layer for their own purpose. UTK with support from Sandia, developped a version of Open MPI where the entire communication infrastucture has been moved down to OPAL (btl/rcache/allocator/mpool). Most of the moved components have been updated to match the new schema, with few exceptions (mainly BTLs where I have no way of compiling/testing them). Thus, the completion of this RFC is tied to being able to completing this move for all BTLs. For this we need help from the rest of the Open MPI community, especially those supporting some of the BTLs. A non-exhaustive list of BTLs that qualify here is: mx, portals4, scif, udapl, ugni, usnic.
This commit was SVN r32317.
2014-07-26 00:47:28 +00:00
|
|
|
return OPAL_SUCCESS;
|
2013-11-18 04:58:37 +00:00
|
|
|
}
|