Add support for the udreg registration cache and dynamics on XE/XK/XC.
To support the new mpool two changes were made to the mpool infrastructure: 1) Added an mpool flag to indicate that an mpool does not need the memory hooks to use the leave pinned protocols. This flag is checked in the mpool lookup. 2) Add a mpool context to the base registration. This new member is used by the udreg mpool to store the udreg context associated with the particular registration. The new member will not break the ABI compatibility as the new member is only currently used by the udreg mpool. Dynamics support for Cray systems makes use of the global rank provided by orte to give the ugni library a unique rank for each process. Dynamics support is not available under direct-launch (srun.) cmr=v1.7.4 This commit was SVN r29719.
Этот коммит содержится в:
родитель
5206e877be
Коммит
24a7e7aa34
@ -14,6 +14,9 @@ with_pmi=/opt/cray/pmi/2.1.4-1.0000.8596.8.9.gem
|
||||
with_ugni=/opt/cray/ugni/2.3-1.0400.4127.5.20.gem
|
||||
with_ugni_includedir=/opt/cray/gni-headers/default/include
|
||||
|
||||
# enable the udreg mpool
|
||||
with_udreg=/opt/cray/udreg/2.3.2-1.0401.5929.3.3.gem
|
||||
|
||||
if test -e /opt/cray/alps/default ; then
|
||||
# use default location for CLE4
|
||||
with_alps=yes
|
||||
@ -29,5 +32,5 @@ with_tm=no
|
||||
with_slurm=no
|
||||
|
||||
# rpath alps, pmi, uGNI, and xpmem libraries
|
||||
LDFLAGS="$LDFLAGS -Wl,-rpath=/usr/lib64/alps -Wl,-rpath=$with_pmi/lib64 -Wl,-rpath=$with_ugni/lib64 -Wl,-rpath=$with_xpmem/lib64"
|
||||
LDFLAGS="$LDFLAGS -Wl,-rpath=/usr/lib64/alps -Wl,-rpath=$with_pmi/lib64 -Wl,-rpath=$with_ugni/lib64 -Wl,-rpath=$with_xpmem/lib64 -Wl,-rpath=$with_udreg/lib64"
|
||||
with_wrapper_ldflags="-Wl,-rpath=/usr/lib64/alps -Wl,-rpath=$with_pmi/lib64 -Wl,-rpath=$with_ugni/lib64 -Wl,-rpath=$with_xpmem/lib64"
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -23,7 +23,7 @@
|
||||
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/grdma/mpool_grdma.h"
|
||||
#include "ompi/mca/mpool/udreg/mpool_udreg.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal_stdint.h"
|
||||
|
||||
@ -42,17 +42,32 @@
|
||||
#include <gni_pub.h>
|
||||
|
||||
/* datagram message ids */
|
||||
#define MCA_BTL_UGNI_CONNECT_WILDCARD_ID 0x6b69726b00000000ull
|
||||
#define MCA_BTL_UGNI_CONNECT_DIRECTED_ID 0x6b61686e00000000ull
|
||||
#define MCA_BTL_UGNI_DATAGRAM_MASK 0xffffffff00000000ull
|
||||
#define MCA_BTL_UGNI_CONNECT_WILDCARD_ID 0x0000000000000000ull
|
||||
#define MCA_BTL_UGNI_CONNECT_DIRECTED_ID 0x8000000000000000ull
|
||||
#define MCA_BTL_UGNI_DATAGRAM_MASK 0x8000000000000000ull
|
||||
|
||||
/* ompi and smsg endpoint attributes */
|
||||
typedef struct mca_btl_ugni_endpoint_attr_t {
|
||||
uint64_t proc_id;
|
||||
uint32_t index;
|
||||
gni_smsg_attr_t smsg_attr;
|
||||
} mca_btl_ugni_endpoint_attr_t;
|
||||
|
||||
enum {
|
||||
MCA_BTL_UGNI_MPOOL_UDREG,
|
||||
MCA_BTL_UGNI_MPOOL_GRDMA
|
||||
};
|
||||
|
||||
typedef struct mca_btl_ugni_module_t {
|
||||
mca_btl_base_module_t super;
|
||||
|
||||
bool initialized;
|
||||
|
||||
ompi_common_ugni_device_t *device;
|
||||
|
||||
size_t endpoint_count;
|
||||
struct mca_btl_base_endpoint_t **endpoints;
|
||||
opal_pointer_array_t endpoints;
|
||||
opal_hash_table_t id_to_endpoint;
|
||||
|
||||
opal_list_t failed_frags;
|
||||
|
||||
@ -60,7 +75,7 @@ typedef struct mca_btl_ugni_module_t {
|
||||
ompi_free_list_t smsg_mboxes;
|
||||
|
||||
gni_ep_handle_t wildcard_ep;
|
||||
gni_smsg_attr_t wc_remote_attr, wc_local_attr;
|
||||
struct mca_btl_ugni_endpoint_attr_t wc_remote_attr, wc_local_attr;
|
||||
|
||||
gni_cq_handle_t rdma_local_cq;
|
||||
gni_cq_handle_t smsg_remote_cq;
|
||||
@ -85,6 +100,12 @@ typedef struct mca_btl_ugni_module_t {
|
||||
|
||||
uint32_t reg_max;
|
||||
uint32_t reg_count;
|
||||
|
||||
/* used to calculate the fraction of registered memory resources
|
||||
* this rank should be limited too */
|
||||
int nlocal_procs;
|
||||
|
||||
int active_send_count;
|
||||
} mca_btl_ugni_module_t;
|
||||
|
||||
typedef struct mca_btl_ugni_component_t {
|
||||
@ -132,6 +153,15 @@ typedef struct mca_btl_ugni_component_t {
|
||||
|
||||
/* Maximum number of memory registrations per process */
|
||||
int max_mem_reg;
|
||||
|
||||
/* Page size to use for SMSG allocations (udreg mpool) */
|
||||
unsigned int smsg_page_size;
|
||||
|
||||
/* mpool type (grdma or udreg) */
|
||||
int mpool_type;
|
||||
|
||||
/* Number of mailboxes to allocate in each block */
|
||||
unsigned int mbox_increment;
|
||||
} mca_btl_ugni_component_t;
|
||||
|
||||
int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
|
||||
@ -259,4 +289,10 @@ typedef struct mca_btl_ugni_reg_t {
|
||||
OMPI_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component;
|
||||
OMPI_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module;
|
||||
|
||||
/* Get a unique 64-bit id for the process name */
|
||||
static inline uint64_t mca_btl_ugni_proc_name_to_id (ompi_process_name_t name) {
|
||||
/* Throw away the top bit of the jobid for the datagram type */
|
||||
return ((uint64_t) (name.jobid & 0x7fffffff) << 32 | (uint64_t) name.vpid);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -16,10 +16,15 @@
|
||||
#include "btl_ugni_frag.h"
|
||||
#include "btl_ugni_smsg.h"
|
||||
|
||||
#include "opal/include/opal/align.h"
|
||||
#include "ompi/attribute/attribute.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
|
||||
static int
|
||||
mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module);
|
||||
static void
|
||||
mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_procs);
|
||||
static int mca_btl_ugni_smsg_setup (int nprocs);
|
||||
|
||||
int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
@ -27,46 +32,63 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t **peers,
|
||||
opal_bitmap_t *reachable) {
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
size_t ntotal_procs, nlocal_procs, i;
|
||||
bool first_time_init = (NULL == ugni_module->endpoints);
|
||||
ompi_proc_t *my_proc = ompi_proc_local ();
|
||||
size_t ntotal_procs, i;
|
||||
int rc;
|
||||
|
||||
if (NULL == ugni_module->endpoints) {
|
||||
if (false == ugni_module->initialized) {
|
||||
(void) ompi_proc_world (&ntotal_procs);
|
||||
|
||||
ugni_module->endpoints = calloc (ntotal_procs, sizeof (mca_btl_base_endpoint_t *));
|
||||
if (OPAL_UNLIKELY(NULL == ugni_module->endpoints)) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
rc = opal_pointer_array_init (&ugni_module->endpoints, ntotal_procs, 1 << 24, 512);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
BTL_ERROR(("error inializing the endpoint array. rc = %d", rc));
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/* NTH: might want to vary this size based off the universe size (if
|
||||
* one exists). the table is only used for connection lookup and
|
||||
* endpoint removal. */
|
||||
rc = opal_hash_table_init (&ugni_module->id_to_endpoint, 512);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
BTL_ERROR(("error initializing the endpoint hash. rc = %d", rc));
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0, nlocal_procs = 0 ; i < nprocs ; ++i) {
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
struct ompi_proc_t *ompi_proc = procs[i];
|
||||
uint32_t rem_rank = ompi_proc->proc_name.vpid;
|
||||
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(ompi_proc->proc_name);
|
||||
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(ompi_proc->proc_flags)) {
|
||||
/* don't use uGNI to communicate with local procs */
|
||||
nlocal_procs++;
|
||||
ugni_module->nlocal_procs++;
|
||||
|
||||
/* Do not use uGNI to communicate with local procs unless we are adding more ranks.
|
||||
* Change this when sm and vader are updated to handle additional add procs. */
|
||||
if (!ugni_module->initialized || my_proc == ompi_proc) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Create and Init endpoints */
|
||||
rc = mca_btl_ugni_init_ep (peers + i, (mca_btl_ugni_module_t *) btl, ompi_proc);
|
||||
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, ompi_proc);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
BTL_ERROR(("btl/ugni error initializing endpoint"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Add this endpoint to the pointer array. */
|
||||
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, peers[i]));
|
||||
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]);
|
||||
|
||||
/* Set the reachable bit */
|
||||
rc = opal_bitmap_set_bit (reachable, i);
|
||||
|
||||
/* Store a reference to this peer */
|
||||
ugni_module->endpoints[rem_rank] = peers[i];
|
||||
++ugni_module->endpoint_count;
|
||||
}
|
||||
|
||||
if (first_time_init) {
|
||||
mca_btl_ugni_module_set_max_reg (ugni_module, nlocal_procs);
|
||||
mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs);
|
||||
|
||||
if (false == ugni_module->initialized) {
|
||||
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
|
||||
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->rdma_local_cq);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
@ -99,9 +121,9 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
|
||||
BTL_ERROR(("btl/ugni error initializing SMSG"));
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
ugni_module->endpoint_count += nprocs;
|
||||
ugni_module->initialized = true;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -111,26 +133,33 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t **peers) {
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
size_t i;
|
||||
int rc;
|
||||
|
||||
/* NTH: this function destroys the endpoint list which will cause bad
|
||||
things to happen if the caller only wants to delete a few procs. */
|
||||
while (ugni_module->active_send_count) {
|
||||
/* ensure all sends are complete before removing and procs */
|
||||
rc = mca_btl_ugni_progress_local_smsg (ugni_module);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
struct ompi_proc_t *ompi_proc = procs[i];
|
||||
uint32_t rem_rank = ompi_proc->proc_name.vpid;
|
||||
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(ompi_proc->proc_name);
|
||||
mca_btl_base_endpoint_t *ep = NULL;
|
||||
|
||||
if (ugni_module->endpoints[rem_rank]) {
|
||||
mca_btl_ugni_release_ep (ugni_module->endpoints[rem_rank]);
|
||||
/* lookup this proc in the hash table */
|
||||
(void) opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
|
||||
|
||||
BTL_VERBOSE(("deleting endpoint with proc id 0x%" PRIx64 ", ptr: %p", proc_id, ep));
|
||||
|
||||
if (NULL != ep) {
|
||||
mca_btl_ugni_release_ep (ep);
|
||||
--ugni_module->endpoint_count;
|
||||
}
|
||||
|
||||
ugni_module->endpoints[rem_rank] = NULL;
|
||||
}
|
||||
|
||||
ugni_module->endpoint_count -= nprocs;
|
||||
|
||||
if (0 == ugni_module->endpoint_count) {
|
||||
free (ugni_module->endpoints);
|
||||
ugni_module->endpoints = NULL;
|
||||
/* remote the endpoint from the hash table */
|
||||
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, NULL);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -194,8 +223,9 @@ static int
|
||||
mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
||||
{
|
||||
struct mca_mpool_base_resources_t mpool_resources;
|
||||
int mbox_increment, rc;
|
||||
size_t nprocs;
|
||||
unsigned int mbox_increment, nprocs;
|
||||
const char *mpool_name;
|
||||
int rc;
|
||||
|
||||
rc = opal_pointer_array_init (&ugni_module->pending_smsg_frags_bb, 0,
|
||||
1 << 30, 32768);
|
||||
@ -203,7 +233,14 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
||||
return rc;
|
||||
}
|
||||
|
||||
(void) ompi_proc_world (&nprocs);
|
||||
/* determine how many procs are in the job (might want to check universe size here) */
|
||||
nprocs = ompi_comm_size ((ompi_communicator_t *) MPI_COMM_WORLD);
|
||||
|
||||
rc = mca_btl_ugni_smsg_setup (nprocs);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error setting up smsg"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = ompi_free_list_init_ex_new (&ugni_module->smsg_frags,
|
||||
sizeof (mca_btl_ugni_smsg_frag_t),
|
||||
@ -243,24 +280,41 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
||||
return rc;
|
||||
}
|
||||
|
||||
mpool_resources.pool_name = "ugni";
|
||||
mpool_resources.pool_name = "ompi.ugni";
|
||||
mpool_resources.reg_data = (void *) ugni_module;
|
||||
mpool_resources.sizeof_reg = sizeof (mca_btl_ugni_reg_t);
|
||||
mpool_resources.register_mem = ugni_reg_rdma_mem;
|
||||
mpool_resources.deregister_mem = ugni_dereg_mem;
|
||||
|
||||
if (MCA_BTL_UGNI_MPOOL_UDREG == mca_btl_ugni_component.mpool_type) {
|
||||
/* additional settings for the udreg mpool */
|
||||
/* 4k should be large enough for any Gemini/Ares system */
|
||||
mpool_resources.max_entries = 4096;
|
||||
mpool_resources.use_kernel_cache = true;
|
||||
|
||||
/* request a specific page size. this request may not be honored if the
|
||||
* page size does not exist. */
|
||||
mpool_resources.page_size = mca_btl_ugni_component.smsg_page_size;
|
||||
|
||||
mpool_resources.use_evict_w_unreg = false;
|
||||
mpool_name = "udreg";
|
||||
} else {
|
||||
mpool_name = "grdma";
|
||||
}
|
||||
|
||||
ugni_module->super.btl_mpool =
|
||||
mca_mpool_base_module_create("grdma", ugni_module->device,
|
||||
&mpool_resources);
|
||||
mca_mpool_base_module_create(mpool_name, ugni_module->device, &mpool_resources);
|
||||
|
||||
mpool_resources.register_mem = ugni_reg_smsg_mem;
|
||||
|
||||
ugni_module->smsg_mpool =
|
||||
mca_mpool_base_module_create(mpool_name, ugni_module->device, &mpool_resources);
|
||||
|
||||
if (NULL == ugni_module->super.btl_mpool) {
|
||||
BTL_ERROR(("error creating rdma mpool"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
mpool_resources.register_mem = ugni_reg_smsg_mem;
|
||||
|
||||
ugni_module->smsg_mpool =
|
||||
mca_mpool_base_module_create("grdma", ugni_module->device,
|
||||
&mpool_resources);
|
||||
if (NULL == ugni_module->smsg_mpool) {
|
||||
BTL_ERROR(("error creating smsg mpool"));
|
||||
return OMPI_ERROR;
|
||||
@ -298,23 +352,24 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
||||
|
||||
OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, ompi_free_list_t);
|
||||
|
||||
mbox_increment = nprocs;
|
||||
|
||||
if (0 == mca_btl_ugni_component.mbox_increment) {
|
||||
/* limit mailbox allocations to either 12.5% of available registrations
|
||||
or 2MiB per allocation */
|
||||
if (nprocs * mca_btl_ugni_component.smsg_mbox_size > 2097152) {
|
||||
mbox_increment = (int) (2097152.0 / (float)mca_btl_ugni_component.smsg_mbox_size);
|
||||
}
|
||||
|
||||
/* we may end up using more */
|
||||
if (nprocs/mbox_increment > ugni_module->reg_max / 8) {
|
||||
mbox_increment = nprocs / (ugni_module->reg_max >> 3);
|
||||
}
|
||||
} else {
|
||||
mbox_increment = mca_btl_ugni_component.mbox_increment;
|
||||
}
|
||||
|
||||
rc = ompi_free_list_init_new (&ugni_module->smsg_mboxes,
|
||||
sizeof (mca_btl_ugni_smsg_mbox_t), 8,
|
||||
OBJ_CLASS(mca_btl_ugni_smsg_mbox_t),
|
||||
mca_btl_ugni_component.smsg_mbox_size, 128,
|
||||
32, nprocs, mbox_increment,
|
||||
32, -1, mbox_increment,
|
||||
ugni_module->smsg_mpool);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error creating smsg mailbox free list"));
|
||||
@ -331,11 +386,12 @@ mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_
|
||||
#if defined(HAVE_GNI_GETJOBRESINFO)
|
||||
gni_job_res_desc_t res_des;
|
||||
gni_return_t grc;
|
||||
int fuzz = 20;
|
||||
|
||||
grc = GNI_GetJobResInfo (ugni_module->device->dev_id, ompi_common_ugni_module.ptag,
|
||||
GNI_JOB_RES_MDD, &res_des);
|
||||
if (GNI_RC_SUCCESS == grc) {
|
||||
ugni_module->reg_max = (res_des.limit - res_des.used) / nlocal_procs;
|
||||
ugni_module->reg_max = (res_des.limit - fuzz) / nlocal_procs;
|
||||
}
|
||||
#else
|
||||
/* no way to determine the maximum registration count */
|
||||
@ -350,3 +406,46 @@ mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_
|
||||
ugni_module->reg_count = 0;
|
||||
}
|
||||
|
||||
static int mca_btl_ugni_smsg_setup (int nprocs)
|
||||
{
|
||||
gni_smsg_attr_t tmp_smsg_attrib;
|
||||
unsigned int mbox_size;
|
||||
gni_return_t grc;
|
||||
|
||||
if (0 == mca_btl_ugni_component.ugni_smsg_limit) {
|
||||
/* auto-set the smsg limit based on the number of ranks */
|
||||
if (nprocs <= 512) {
|
||||
mca_btl_ugni_component.ugni_smsg_limit = 8192;
|
||||
} else if (nprocs <= 1024) {
|
||||
mca_btl_ugni_component.ugni_smsg_limit = 2048;
|
||||
} else if (nprocs <= 8192) {
|
||||
mca_btl_ugni_component.ugni_smsg_limit = 1024;
|
||||
} else if (nprocs <= 16384) {
|
||||
mca_btl_ugni_component.ugni_smsg_limit = 512;
|
||||
} else {
|
||||
mca_btl_ugni_component.ugni_smsg_limit = 256;
|
||||
}
|
||||
}
|
||||
|
||||
mca_btl_ugni_component.smsg_max_data = mca_btl_ugni_component.ugni_smsg_limit -
|
||||
sizeof (mca_btl_ugni_send_frag_hdr_t);
|
||||
|
||||
if (mca_btl_ugni_component.ugni_smsg_limit == mca_btl_ugni_module.super.btl_eager_limit) {
|
||||
mca_btl_ugni_module.super.btl_eager_limit = mca_btl_ugni_component.smsg_max_data;
|
||||
}
|
||||
|
||||
/* calculate mailbox size */
|
||||
tmp_smsg_attrib.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
|
||||
tmp_smsg_attrib.msg_maxsize = mca_btl_ugni_component.ugni_smsg_limit;
|
||||
tmp_smsg_attrib.mbox_maxcredit = mca_btl_ugni_component.smsg_max_credits;
|
||||
|
||||
grc = GNI_SmsgBufferSizeNeeded (&tmp_smsg_attrib, &mbox_size);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
BTL_ERROR(("error in GNI_SmsgBufferSizeNeeded"));
|
||||
return ompi_common_rc_ugni_to_ompi (grc);
|
||||
}
|
||||
|
||||
mca_btl_ugni_component.smsg_mbox_size = OPAL_ALIGN(mbox_size, 64, unsigned int);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -15,10 +15,12 @@
|
||||
#include "btl_ugni_rdma.h"
|
||||
#include "btl_ugni_smsg.h"
|
||||
|
||||
#include "opal/include/opal/align.h"
|
||||
#include "opal/memoryhooks/memory.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
|
||||
#include "ompi/attribute/attribute.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
|
||||
static int btl_ugni_component_register(void);
|
||||
static int btl_ugni_component_open(void);
|
||||
static int btl_ugni_component_close(void);
|
||||
@ -26,33 +28,40 @@ static mca_btl_base_module_t **mca_btl_ugni_component_init(int *, bool, bool);
|
||||
static int mca_btl_ugni_component_progress(void);
|
||||
|
||||
mca_btl_ugni_component_t mca_btl_ugni_component = {
|
||||
{
|
||||
.super = {
|
||||
/* First, the mca_base_component_t struct containing meta information
|
||||
about the component itself */
|
||||
|
||||
{
|
||||
.btl_version = {
|
||||
MCA_BTL_BASE_VERSION_2_0_0,
|
||||
|
||||
"ugni", /* MCA component name */
|
||||
OMPI_MAJOR_VERSION, /* MCA component major version */
|
||||
OMPI_MINOR_VERSION, /* MCA component minor version */
|
||||
OMPI_RELEASE_VERSION, /* MCA component release version */
|
||||
btl_ugni_component_open, /* component open */
|
||||
btl_ugni_component_close, /* component close */
|
||||
NULL, /* component query */
|
||||
btl_ugni_component_register, /* component register */
|
||||
.mca_component_name = "ugni",
|
||||
.mca_component_major_version = OMPI_MAJOR_VERSION,
|
||||
.mca_component_minor_version = OMPI_MINOR_VERSION,
|
||||
.mca_component_release_version = OMPI_RELEASE_VERSION,
|
||||
.mca_open_component = btl_ugni_component_open,
|
||||
.mca_close_component = btl_ugni_component_close,
|
||||
.mca_register_component_params = btl_ugni_component_register,
|
||||
},
|
||||
{
|
||||
.btl_data = {
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
mca_btl_ugni_component_init,
|
||||
mca_btl_ugni_component_progress,
|
||||
.btl_init = mca_btl_ugni_component_init,
|
||||
.btl_progress = mca_btl_ugni_component_progress,
|
||||
}
|
||||
};
|
||||
|
||||
mca_base_var_enum_value_t mpool_values[] = {
|
||||
{MCA_BTL_UGNI_MPOOL_UDREG, "udreg"},
|
||||
{MCA_BTL_UGNI_MPOOL_GRDMA, "grdma"},
|
||||
{-1, NULL} /* sentinal */
|
||||
};
|
||||
|
||||
static int
|
||||
btl_ugni_component_register(void)
|
||||
{
|
||||
mca_base_var_enum_t *new_enum;
|
||||
int rc;
|
||||
|
||||
(void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"Gemini byte transport layer");
|
||||
|
||||
@ -60,44 +69,38 @@ btl_ugni_component_register(void)
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"free_list_num", NULL, MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.ugni_free_list_num);
|
||||
mca_btl_ugni_component.ugni_free_list_max = 16384;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"free_list_max", NULL, MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.ugni_free_list_max);
|
||||
mca_btl_ugni_component.ugni_free_list_inc = 64;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.ugni_free_list_inc);
|
||||
|
||||
mca_btl_ugni_component.ugni_eager_num = 16;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"eager_num", NULL, MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.ugni_eager_num);
|
||||
mca_btl_ugni_component.ugni_eager_max = 128;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"eager_max", NULL, MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.ugni_eager_max);
|
||||
mca_btl_ugni_component.ugni_eager_inc = 16;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"eager_inc", NULL, MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.ugni_eager_inc);
|
||||
|
||||
mca_btl_ugni_component.remote_cq_size = 40000;
|
||||
@ -105,16 +108,14 @@ btl_ugni_component_register(void)
|
||||
"remote_cq_size", "Remote SMSG completion queue "
|
||||
"size (default 40000)", MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.remote_cq_size);
|
||||
mca_btl_ugni_component.local_cq_size = 8192;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"local_cq_size", "Local completion queue size "
|
||||
"(default 8192)", MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.local_cq_size);
|
||||
|
||||
mca_btl_ugni_component.ugni_smsg_limit = 0;
|
||||
@ -124,8 +125,7 @@ btl_ugni_component_register(void)
|
||||
"(0 - autoselect(default), 16k max)",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0,
|
||||
MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.ugni_smsg_limit);
|
||||
|
||||
mca_btl_ugni_component.smsg_max_credits = 32;
|
||||
@ -134,8 +134,7 @@ btl_ugni_component_register(void)
|
||||
"outstanding SMSG/MSGQ message (default 32)",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0,
|
||||
MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.smsg_max_credits);
|
||||
|
||||
mca_btl_ugni_component.ugni_fma_limit = 1024;
|
||||
@ -145,8 +144,7 @@ btl_ugni_component_register(void)
|
||||
"Access) protocol (default 1024, 64k max)",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0,
|
||||
MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.ugni_fma_limit);
|
||||
|
||||
mca_btl_ugni_component.ugni_get_limit = 1 * 1024 * 1024;
|
||||
@ -155,24 +153,21 @@ btl_ugni_component_register(void)
|
||||
"will be sent using a get protocol "
|
||||
"(default 1M)", MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.ugni_get_limit);
|
||||
|
||||
mca_btl_ugni_component.rdma_max_retries = 16;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.rdma_max_retries);
|
||||
|
||||
mca_btl_ugni_component.smsg_max_retries = 16;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"smsg_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.smsg_max_retries);
|
||||
|
||||
mca_btl_ugni_component.max_mem_reg = 0;
|
||||
@ -182,10 +177,40 @@ btl_ugni_component_register(void)
|
||||
"hold (0 - autoselect, -1 - unlimited)"
|
||||
" (default 0)", MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.max_mem_reg);
|
||||
|
||||
mca_btl_ugni_component.mbox_increment = 0;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"mbox_inc", "Number of SMSG mailboxes to "
|
||||
"allocate in each block (0 - autoselect(default))",
|
||||
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
|
||||
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment);
|
||||
|
||||
mca_btl_ugni_component.smsg_page_size = 2 << 20;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"smsg_page_size", "Page size to use for SMSG "
|
||||
"mailbox allocation (default 2M)",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0,
|
||||
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.smsg_page_size);
|
||||
|
||||
/* btl/ugni can only support only a fixed set of mpools (these mpools have compatible resource
|
||||
* structures) */
|
||||
rc = mca_base_var_enum_create ("btl_ugni_mpool", mpool_values, &new_enum);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
mca_btl_ugni_component.mpool_type = MCA_BTL_UGNI_MPOOL_UDREG;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"mpool", "mpool to use", MCA_BASE_VAR_TYPE_INT, new_enum,
|
||||
0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_type);
|
||||
OBJ_RELEASE(new_enum);
|
||||
|
||||
mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
|
||||
|
||||
/* smsg threshold */
|
||||
@ -234,6 +259,7 @@ btl_ugni_component_close(void)
|
||||
}
|
||||
|
||||
static void mca_btl_ugni_autoset_leave_pinned (void) {
|
||||
if (MCA_BTL_UGNI_MPOOL_UDREG != mca_btl_ugni_component.mpool_type) {
|
||||
int value = opal_mem_hooks_support_level();
|
||||
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
|
||||
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
|
||||
@ -245,48 +271,11 @@ static void mca_btl_ugni_autoset_leave_pinned (void) {
|
||||
ompi_mpi_leave_pinned = 0;
|
||||
ompi_mpi_leave_pinned_pipeline = 0;
|
||||
}
|
||||
} else if (-1 == ompi_mpi_leave_pinned) {
|
||||
/* if udreg is in use we can set leave pinned without checking for the
|
||||
* memory hooks. */
|
||||
ompi_mpi_leave_pinned = !ompi_mpi_leave_pinned_pipeline;
|
||||
}
|
||||
|
||||
static int mca_btl_ugni_smsg_setup (void) {
|
||||
gni_smsg_attr_t tmp_smsg_attrib;
|
||||
unsigned int mbox_size;
|
||||
size_t nprocs;
|
||||
gni_return_t rc;
|
||||
|
||||
(void) ompi_proc_world (&nprocs);
|
||||
|
||||
if (0 == mca_btl_ugni_component.ugni_smsg_limit) {
|
||||
/* auto-set the smsg limit based on the number of ranks */
|
||||
if (nprocs <= 512) {
|
||||
mca_btl_ugni_component.ugni_smsg_limit = 8192;
|
||||
} else if (nprocs <= 1024) {
|
||||
mca_btl_ugni_component.ugni_smsg_limit = 2048;
|
||||
} else if (nprocs <= 8192) {
|
||||
mca_btl_ugni_component.ugni_smsg_limit = 1024;
|
||||
} else if (nprocs <= 16384) {
|
||||
mca_btl_ugni_component.ugni_smsg_limit = 512;
|
||||
} else {
|
||||
mca_btl_ugni_component.ugni_smsg_limit = 256;
|
||||
}
|
||||
}
|
||||
|
||||
mca_btl_ugni_component.smsg_max_data = mca_btl_ugni_component.ugni_smsg_limit -
|
||||
sizeof (mca_btl_ugni_send_frag_hdr_t);
|
||||
|
||||
/* calculate mailbox size */
|
||||
tmp_smsg_attrib.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
|
||||
tmp_smsg_attrib.msg_maxsize = mca_btl_ugni_component.ugni_smsg_limit;
|
||||
tmp_smsg_attrib.mbox_maxcredit = mca_btl_ugni_component.smsg_max_credits;
|
||||
|
||||
rc = GNI_SmsgBufferSizeNeeded (&tmp_smsg_attrib, &mbox_size);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error in GNI_SmsgBufferSizeNeeded"));
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
mca_btl_ugni_component.smsg_mbox_size = OPAL_ALIGN(mbox_size, 64, unsigned int);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static mca_btl_base_module_t **
|
||||
@ -337,15 +326,6 @@ mca_btl_ugni_component_init (int *num_btl_modules,
|
||||
|
||||
mca_btl_ugni_autoset_leave_pinned ();
|
||||
|
||||
rc = mca_btl_ugni_smsg_setup ();
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (mca_btl_ugni_component.ugni_smsg_limit == mca_btl_ugni_module.super.btl_eager_limit) {
|
||||
mca_btl_ugni_module.super.btl_eager_limit = mca_btl_ugni_component.smsg_max_data;
|
||||
}
|
||||
|
||||
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit;
|
||||
|
||||
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
|
||||
@ -364,7 +344,7 @@ mca_btl_ugni_component_init (int *num_btl_modules,
|
||||
|
||||
*num_btl_modules = mca_btl_ugni_component.ugni_num_btls;
|
||||
|
||||
BTL_VERBOSE(("btl/ugni done initializing modules"));
|
||||
BTL_VERBOSE(("btl/ugni done initializing %d module(s)", *num_btl_modules));
|
||||
|
||||
return base_modules;
|
||||
}
|
||||
@ -373,12 +353,12 @@ static inline int
|
||||
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
|
||||
{
|
||||
uint32_t remote_addr, remote_id;
|
||||
uint64_t datagram_id, data;
|
||||
mca_btl_base_endpoint_t *ep;
|
||||
gni_post_state_t post_state;
|
||||
gni_ep_handle_t handle;
|
||||
uint64_t datagram_id;
|
||||
gni_return_t grc;
|
||||
int count = 0;
|
||||
int count = 0, rc;
|
||||
|
||||
/* check for datagram completion */
|
||||
grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id);
|
||||
@ -386,12 +366,15 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) ==
|
||||
MCA_BTL_UGNI_CONNECT_WILDCARD_ID) {
|
||||
handle = ugni_module->wildcard_ep;
|
||||
data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK);
|
||||
|
||||
BTL_VERBOSE(("datgram_id: %" PRIx64 ", mask: %" PRIx64, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK)));
|
||||
|
||||
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) {
|
||||
ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&ugni_module->endpoints, data);
|
||||
handle = ep->smsg_ep_handle;
|
||||
} else {
|
||||
handle =
|
||||
ugni_module->endpoints[(uint32_t)(datagram_id & 0xffffffffull)]->smsg_ep_handle;
|
||||
handle = ugni_module->wildcard_ep;
|
||||
}
|
||||
|
||||
/* wait for the incoming datagram to complete (in case it isn't) */
|
||||
@ -402,10 +385,28 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
|
||||
return ompi_common_rc_ugni_to_ompi (grc);
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, "
|
||||
"peer = %d", datagram_id, post_state, remote_id));
|
||||
/* if this is a wildcard endpoint lookup the remote peer by the proc id we received */
|
||||
if (handle == ugni_module->wildcard_ep) {
|
||||
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64, ugni_module->wc_remote_attr.proc_id));
|
||||
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint,
|
||||
ugni_module->wc_remote_attr.proc_id,
|
||||
(void *) &ep);
|
||||
/* check if the endpoint is known */
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) {
|
||||
BTL_ERROR(("received connection attempt from an unknown peer. rc: %d, ep: %p, id: 0x%" PRIx64,
|
||||
rc, ep, ugni_module->wc_remote_attr.proc_id));
|
||||
return OMPI_ERR_NOT_FOUND;
|
||||
}
|
||||
} else {
|
||||
BTL_VERBOSE(("directed datagram complete for endpoint %p", ep));
|
||||
}
|
||||
|
||||
ep = ugni_module->endpoints[remote_id];
|
||||
/* should not have gotten a NULL endpoint */
|
||||
assert (NULL != ep);
|
||||
|
||||
BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, "
|
||||
"data = 0x%" PRIx64 ", ep = %p, remote id: %d", datagram_id, post_state,
|
||||
data, ep, remote_id));
|
||||
|
||||
/* NTH: TODO -- error handling */
|
||||
(void) mca_btl_ugni_ep_connect_progress (ep);
|
||||
@ -416,8 +417,7 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
|
||||
}
|
||||
|
||||
/* repost the wildcard datagram */
|
||||
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) ==
|
||||
MCA_BTL_UGNI_CONNECT_WILDCARD_ID) {
|
||||
if (handle == ugni_module->wildcard_ep) {
|
||||
mca_btl_ugni_wildcard_ep_post (ugni_module);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011-2013 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -41,20 +41,20 @@ static inline int mca_btl_ugni_ep_smsg_get_mbox (mca_btl_base_endpoint_t *ep) {
|
||||
}
|
||||
|
||||
ep->mailbox = (mca_btl_ugni_smsg_mbox_t *) mbox;
|
||||
ep->mailbox->attr.index = ep->index;
|
||||
|
||||
/* per ugni spec we need to zero mailbox data before connecting */
|
||||
memset ((char *)ep->mailbox->smsg_attrib.msg_buffer + ep->mailbox->smsg_attrib.mbox_offset, 0,
|
||||
ep->mailbox->smsg_attrib.buff_size);
|
||||
memset ((char *)ep->mailbox->attr.smsg_attr.msg_buffer + ep->mailbox->attr.smsg_attr.mbox_offset, 0,
|
||||
ep->mailbox->attr.smsg_attr.buff_size);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) {
|
||||
gni_return_t rc;
|
||||
|
||||
do {
|
||||
if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
|
||||
/* nothing to do */
|
||||
break;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
|
||||
@ -74,7 +74,6 @@ int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnec
|
||||
ep->mailbox = NULL;
|
||||
|
||||
ep->state = MCA_BTL_UGNI_EP_STATE_INIT;
|
||||
} while (0);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -89,8 +88,8 @@ static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("initiaiting connection to remote peer with address: %u id: %u",
|
||||
ep->common->ep_rem_addr, ep->common->ep_rem_id));
|
||||
BTL_VERBOSE(("initiaiting connection to remote peer with address: %u id: %u proc: %p",
|
||||
ep->common->ep_rem_addr, ep->common->ep_rem_id, ep->peer_proc));
|
||||
|
||||
/* bind endpoint to remote address */
|
||||
/* we bind two endpoints to seperate out local smsg completion and local fma completion */
|
||||
@ -112,7 +111,7 @@ static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
|
||||
|
||||
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTING;
|
||||
|
||||
memset (&ep->remote_smsg_attrib, 0, sizeof (ep->remote_smsg_attrib));
|
||||
memset (&ep->remote_attr, 0, sizeof (ep->remote_attr));
|
||||
|
||||
BTL_VERBOSE(("btl/ugni connection to remote peer initiated"));
|
||||
|
||||
@ -125,25 +124,32 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
|
||||
|
||||
BTL_VERBOSE(("finishing connection. remote attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
|
||||
"mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
|
||||
"msg_maxsize = %d", ep->remote_smsg_attrib.msg_type, ep->remote_smsg_attrib.msg_buffer,
|
||||
ep->remote_smsg_attrib.buff_size, ep->remote_smsg_attrib.mem_hndl.qword1,
|
||||
ep->remote_smsg_attrib.mem_hndl.qword2, ep->remote_smsg_attrib.mbox_offset,
|
||||
ep->remote_smsg_attrib.mbox_maxcredit, ep->remote_smsg_attrib.msg_maxsize));
|
||||
"msg_maxsize = %d", ep->remote_attr.smsg_attr.msg_type, ep->remote_attr.smsg_attr.msg_buffer,
|
||||
ep->remote_attr.smsg_attr.buff_size, ep->remote_attr.smsg_attr.mem_hndl.qword1,
|
||||
ep->remote_attr.smsg_attr.mem_hndl.qword2, ep->remote_attr.smsg_attr.mbox_offset,
|
||||
ep->remote_attr.smsg_attr.mbox_maxcredit, ep->remote_attr.smsg_attr.msg_maxsize));
|
||||
|
||||
BTL_VERBOSE(("finishing connection. local attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
|
||||
"mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
|
||||
"msg_maxsize = %d", ep->mailbox->smsg_attrib.msg_type, ep->mailbox->smsg_attrib.msg_buffer,
|
||||
ep->mailbox->smsg_attrib.buff_size, ep->mailbox->smsg_attrib.mem_hndl.qword1,
|
||||
ep->mailbox->smsg_attrib.mem_hndl.qword2, ep->mailbox->smsg_attrib.mbox_offset,
|
||||
ep->mailbox->smsg_attrib.mbox_maxcredit, ep->mailbox->smsg_attrib.msg_maxsize));
|
||||
"msg_maxsize = %d", ep->mailbox->attr.smsg_attr.msg_type, ep->mailbox->attr.smsg_attr.msg_buffer,
|
||||
ep->mailbox->attr.smsg_attr.buff_size, ep->mailbox->attr.smsg_attr.mem_hndl.qword1,
|
||||
ep->mailbox->attr.smsg_attr.mem_hndl.qword2, ep->mailbox->attr.smsg_attr.mbox_offset,
|
||||
ep->mailbox->attr.smsg_attr.mbox_maxcredit, ep->mailbox->attr.smsg_attr.msg_maxsize));
|
||||
|
||||
grc = GNI_SmsgInit (ep->smsg_ep_handle, &ep->mailbox->smsg_attrib, &ep->remote_smsg_attrib);
|
||||
grc = GNI_SmsgInit (ep->smsg_ep_handle, &ep->mailbox->attr.smsg_attr, &ep->remote_attr.smsg_attr);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
BTL_ERROR(("error initializing SMSG protocol. rc = %d", grc));
|
||||
|
||||
return ompi_common_rc_ugni_to_ompi (grc);
|
||||
}
|
||||
|
||||
/* set the local event data to the local index and the remote event data to my
|
||||
* index on the remote peer. This makes lookup of endpoints on completion take
|
||||
* a single lookup in the endpoints array. we will not be able to change the
|
||||
* remote peer's index in the endpoint's array after this point. */
|
||||
GNI_EpSetEventData (ep->rdma_ep_handle, ep->index, ep->remote_attr.index);
|
||||
GNI_EpSetEventData (ep->smsg_ep_handle, ep->index, ep->remote_attr.index);
|
||||
|
||||
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED;
|
||||
|
||||
/* send all pending messages */
|
||||
@ -161,9 +167,11 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
|
||||
static inline int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) {
|
||||
gni_return_t rc;
|
||||
|
||||
rc = GNI_EpPostDataWId (ep->smsg_ep_handle, &ep->mailbox->smsg_attrib, sizeof (ep->mailbox->smsg_attrib),
|
||||
&ep->remote_smsg_attrib, sizeof (ep->remote_smsg_attrib),
|
||||
MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->common->ep_rem_id);
|
||||
BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->common->ep_rem_id, ep));
|
||||
|
||||
rc = GNI_EpPostDataWId (ep->smsg_ep_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
|
||||
&ep->remote_attr, sizeof (ep->remote_attr),
|
||||
MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index);
|
||||
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
@ -171,6 +179,8 @@ static inline int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) {
|
||||
int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("progressing connection for endpoint %p with state %d", ep, ep->state));
|
||||
|
||||
if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -182,7 +192,7 @@ int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
|
||||
}
|
||||
}
|
||||
|
||||
if (GNI_SMSG_TYPE_INVALID == ep->remote_smsg_attrib.msg_type) {
|
||||
if (GNI_SMSG_TYPE_INVALID == ep->remote_attr.smsg_attr.msg_type) {
|
||||
/* use datagram to exchange connection information with the remote peer */
|
||||
rc = mca_btl_ugni_directed_ep_post (ep);
|
||||
if (OMPI_SUCCESS == rc) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -39,7 +39,7 @@ typedef struct mca_btl_base_endpoint_t {
|
||||
gni_ep_handle_t smsg_ep_handle;
|
||||
gni_ep_handle_t rdma_ep_handle;
|
||||
|
||||
gni_smsg_attr_t remote_smsg_attrib;
|
||||
mca_btl_ugni_endpoint_attr_t remote_attr;
|
||||
|
||||
struct mca_btl_ugni_smsg_mbox_t *mailbox;
|
||||
|
||||
@ -47,6 +47,8 @@ typedef struct mca_btl_base_endpoint_t {
|
||||
bool wait_listed;
|
||||
|
||||
int32_t smsg_progressing;
|
||||
|
||||
int index;
|
||||
} mca_btl_base_endpoint_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_base_endpoint_t);
|
||||
@ -54,11 +56,11 @@ OBJ_CLASS_DECLARATION(mca_btl_base_endpoint_t);
|
||||
int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep);
|
||||
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect);
|
||||
|
||||
static inline int mca_btl_ugni_init_ep (mca_btl_base_endpoint_t **ep,
|
||||
static inline int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module,
|
||||
mca_btl_base_endpoint_t **ep,
|
||||
mca_btl_ugni_module_t *btl,
|
||||
ompi_proc_t *peer_proc) {
|
||||
mca_btl_base_endpoint_t *endpoint;
|
||||
int rc;
|
||||
|
||||
endpoint = OBJ_NEW(mca_btl_base_endpoint_t);
|
||||
assert (endpoint != NULL);
|
||||
@ -69,6 +71,7 @@ static inline int mca_btl_ugni_init_ep (mca_btl_base_endpoint_t **ep,
|
||||
endpoint->btl = btl;
|
||||
endpoint->peer_proc = peer_proc;
|
||||
endpoint->common = NULL;
|
||||
endpoint->index = opal_pointer_array_add (&ugni_module->endpoints, endpoint);
|
||||
|
||||
*ep = endpoint;
|
||||
|
||||
@ -79,11 +82,18 @@ static inline void mca_btl_ugni_release_ep (mca_btl_base_endpoint_t *ep) {
|
||||
int rc;
|
||||
|
||||
if (ep->common) {
|
||||
opal_mutex_lock (&ep->lock);
|
||||
|
||||
rc = mca_btl_ugni_ep_disconnect (ep, false);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
BTL_VERBOSE(("btl/ugni error disconnecting endpoint"));
|
||||
}
|
||||
|
||||
/* TODO -- Clear space at the end of the endpoint array */
|
||||
opal_pointer_array_set_item (&ep->btl->endpoints, ep->index, NULL);
|
||||
|
||||
opal_mutex_unlock (&ep->lock);
|
||||
|
||||
ompi_common_ugni_endpoint_return (ep->common);
|
||||
}
|
||||
|
||||
@ -97,7 +107,7 @@ static inline int mca_btl_ugni_check_endpoint_state (mca_btl_base_endpoint_t *ep
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ep->lock);
|
||||
opal_mutex_lock (&ep->lock);
|
||||
|
||||
switch (ep->state) {
|
||||
case MCA_BTL_UGNI_EP_STATE_INIT:
|
||||
@ -112,7 +122,7 @@ static inline int mca_btl_ugni_check_endpoint_state (mca_btl_base_endpoint_t *ep
|
||||
rc = OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&ep->lock);
|
||||
opal_mutex_unlock (&ep->lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -120,10 +130,13 @@ static inline int mca_btl_ugni_check_endpoint_state (mca_btl_base_endpoint_t *ep
|
||||
static inline int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module) {
|
||||
gni_return_t rc;
|
||||
|
||||
BTL_VERBOSE(("posting wildcard datagram"));
|
||||
|
||||
memset (&ugni_module->wc_local_attr, 0, sizeof (ugni_module->wc_local_attr));
|
||||
rc = GNI_EpPostDataWId (ugni_module->wildcard_ep, &ugni_module->wc_local_attr, sizeof (ugni_module->wc_local_attr),
|
||||
&ugni_module->wc_remote_attr, sizeof (ugni_module->wc_remote_attr),
|
||||
MCA_BTL_UGNI_CONNECT_WILDCARD_ID | OMPI_PROC_MY_NAME->vpid);
|
||||
memset (&ugni_module->wc_remote_attr, 0, sizeof (ugni_module->wc_remote_attr));
|
||||
rc = GNI_EpPostDataWId (ugni_module->wildcard_ep, &ugni_module->wc_local_attr,
|
||||
sizeof (ugni_module->wc_local_attr), &ugni_module->wc_remote_attr,
|
||||
sizeof (ugni_module->wc_remote_attr), MCA_BTL_UGNI_CONNECT_WILDCARD_ID);
|
||||
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
@ -118,7 +118,7 @@ static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag)
|
||||
{
|
||||
if (frag->registration) {
|
||||
frag->endpoint->btl->super.btl_mpool->mpool_deregister(frag->endpoint->btl->super.btl_mpool,
|
||||
&frag->registration->base);
|
||||
(mca_mpool_base_registration_t *) frag->registration);
|
||||
frag->registration = NULL;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -16,6 +16,7 @@
|
||||
#include "btl_ugni_frag.h"
|
||||
#include "btl_ugni_endpoint.h"
|
||||
#include "btl_ugni_prepare.h"
|
||||
#include "btl_ugni_smsg.h"
|
||||
|
||||
static int
|
||||
mca_btl_ugni_free (struct mca_btl_base_module_t *btl,
|
||||
@ -40,39 +41,18 @@ mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
|
||||
uint32_t flags);
|
||||
|
||||
mca_btl_ugni_module_t mca_btl_ugni_module = {
|
||||
{
|
||||
/* .btl_component = */ &mca_btl_ugni_component.super,
|
||||
|
||||
/* these are set in component_register */
|
||||
/* .btl_eager_limit = */ 0,
|
||||
/* .btl_rndv_eager_limit = */ 0,
|
||||
/* .btl_max_send_size = */ 0,
|
||||
/* .btl_rdma_pipeline_send_length = */ 0,
|
||||
/* .btl_rdma_pipeline_frag_size = */ 0,
|
||||
/* .btl_min_rdma_pipeline_size = */ 0,
|
||||
/* .btl_exclusivity = */ 0,
|
||||
/* .btl_latency = */ 0,
|
||||
/* .btl_bandwidth = */ 0,
|
||||
/* .btl_flags = */ 0,
|
||||
/* .btl_seg_size = */ 0,
|
||||
|
||||
/* member functions */
|
||||
mca_btl_ugni_add_procs,
|
||||
mca_btl_ugni_del_procs,
|
||||
NULL, /* register */
|
||||
mca_btl_ugni_module_finalize,
|
||||
mca_btl_ugni_alloc,
|
||||
mca_btl_ugni_free,
|
||||
mca_btl_ugni_prepare_src,
|
||||
mca_btl_ugni_prepare_dst,
|
||||
mca_btl_ugni_send,
|
||||
NULL, /* sendi */
|
||||
mca_btl_ugni_put,
|
||||
mca_btl_ugni_get,
|
||||
NULL, /* mca_btl_base_dump, */
|
||||
NULL, /* mpool */
|
||||
NULL, /* mca_btl_ugni_register_error_cb - error callback registration */
|
||||
NULL, /* mca_btl_ugni_ft_event */
|
||||
.super = {
|
||||
.btl_component = &mca_btl_ugni_component.super,
|
||||
.btl_add_procs = mca_btl_ugni_add_procs,
|
||||
.btl_del_procs = mca_btl_ugni_del_procs,
|
||||
.btl_finalize = mca_btl_ugni_module_finalize,
|
||||
.btl_alloc = mca_btl_ugni_alloc,
|
||||
.btl_free = mca_btl_ugni_free,
|
||||
.btl_prepare_src = mca_btl_ugni_prepare_src,
|
||||
.btl_prepare_dst = mca_btl_ugni_prepare_dst,
|
||||
.btl_send = mca_btl_ugni_send,
|
||||
.btl_put = mca_btl_ugni_put,
|
||||
.btl_get = mca_btl_ugni_get,
|
||||
}
|
||||
};
|
||||
|
||||
@ -88,6 +68,10 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
|
||||
/* copy module defaults (and function pointers) */
|
||||
memmove (ugni_module, &mca_btl_ugni_module, sizeof (mca_btl_ugni_module));
|
||||
|
||||
ugni_module->initialized = false;
|
||||
ugni_module->nlocal_procs = 0;
|
||||
ugni_module->active_send_count = 0;
|
||||
|
||||
OBJ_CONSTRUCT(&ugni_module->failed_frags, opal_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->eager_frags_send, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->eager_frags_recv, ompi_free_list_t);
|
||||
@ -96,9 +80,10 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
|
||||
OBJ_CONSTRUCT(&ugni_module->rdma_int_frags, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t);
|
||||
|
||||
ugni_module->device = dev;
|
||||
ugni_module->endpoints = NULL;
|
||||
dev->btl_ctx = (void *) ugni_module;
|
||||
|
||||
/* create wildcard endpoint to listen for connections.
|
||||
@ -124,9 +109,19 @@ static int
|
||||
mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *)btl;
|
||||
unsigned int i;
|
||||
mca_btl_base_endpoint_t *ep;
|
||||
uint64_t key;
|
||||
void *node;
|
||||
int rc;
|
||||
|
||||
while (ugni_module->active_send_count) {
|
||||
/* ensure all sends are complete before closing the module */
|
||||
rc = mca_btl_ugni_progress_local_smsg (ugni_module);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&ugni_module->eager_frags_send);
|
||||
OBJ_DESTRUCT(&ugni_module->eager_frags_recv);
|
||||
OBJ_DESTRUCT(&ugni_module->smsg_frags);
|
||||
@ -135,19 +130,14 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
|
||||
OBJ_DESTRUCT(&ugni_module->ep_wait_list);
|
||||
|
||||
/* close all open connections and release endpoints */
|
||||
if (NULL != ugni_module->endpoints) {
|
||||
for (i = 0 ; i < ugni_module->endpoint_count ; ++i) {
|
||||
if (ugni_module->endpoints[i]) {
|
||||
mca_btl_ugni_release_ep (ugni_module->endpoints[i]);
|
||||
if (ugni_module->initialized) {
|
||||
rc = opal_hash_table_get_first_key_uint64 (&ugni_module->id_to_endpoint, &key, (void **) &ep, &node);
|
||||
while (OPAL_SUCCESS == rc) {
|
||||
if (NULL != ep) {
|
||||
mca_btl_ugni_release_ep (ep);
|
||||
}
|
||||
|
||||
ugni_module->endpoints[i] = NULL;
|
||||
}
|
||||
|
||||
free (ugni_module->endpoints);
|
||||
|
||||
ugni_module->endpoint_count = 0;
|
||||
ugni_module->endpoints = NULL;
|
||||
rc = opal_hash_table_get_next_key_uint64 (&ugni_module->id_to_endpoint, &key, (void **) &ep, node, &node);
|
||||
}
|
||||
|
||||
/* destroy all cqs */
|
||||
@ -180,16 +170,24 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
|
||||
BTL_VERBOSE(("btl/ugni error destroying endpoint"));
|
||||
}
|
||||
|
||||
if (NULL != ugni_module->smsg_mpool) {
|
||||
(void) mca_mpool_base_module_destroy (ugni_module->smsg_mpool);
|
||||
ugni_module->smsg_mpool = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ugni_module->super.btl_mpool) {
|
||||
(void) mca_mpool_base_module_destroy (ugni_module->super.btl_mpool);
|
||||
ugni_module->super.btl_mpool = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb);
|
||||
|
||||
OBJ_DESTRUCT(&ugni_module->id_to_endpoint);
|
||||
OBJ_DESTRUCT(&ugni_module->endpoints);
|
||||
OBJ_DESTRUCT(&ugni_module->failed_frags);
|
||||
|
||||
ugni_module->initialized = false;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -25,9 +25,6 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
int flags_save = frag->base.des_flags;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
|
||||
OMPI_PROC_MY_NAME->vpid, endpoint->common->ep_rem_id, frag->segments[0].base.seg_len));
|
||||
|
||||
/* tag and len are at the same location in eager and smsg frag hdrs */
|
||||
frag->hdr.send.lag = (tag << 24) | size;
|
||||
frag->endpoint = endpoint;
|
||||
@ -39,6 +36,9 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
|
||||
OMPI_PROC_MY_NAME->vpid, endpoint->common->ep_rem_id, frag->segments[0].base.seg_len));
|
||||
|
||||
/* temporarily disable ownership and callback flags so we can reliably check the complete flag */
|
||||
frag->base.des_flags &= ~(MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
|
||||
frag->flags &= ~MCA_BTL_UGNI_FRAG_COMPLETE;
|
||||
|
@ -14,17 +14,21 @@
|
||||
#include "btl_ugni_rdma.h"
|
||||
|
||||
static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
|
||||
struct mca_btl_ugni_reg_t *reg =
|
||||
struct mca_btl_ugni_reg_t *ugni_reg =
|
||||
(struct mca_btl_ugni_reg_t *) mbox->super.registration;
|
||||
struct mca_mpool_base_registration_t *base_reg =
|
||||
(struct mca_mpool_base_registration_t *) ugni_reg;
|
||||
|
||||
/* initialize mailbox attributes */
|
||||
mbox->smsg_attrib.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
|
||||
mbox->smsg_attrib.msg_maxsize = mca_btl_ugni_component.ugni_smsg_limit;
|
||||
mbox->smsg_attrib.mbox_maxcredit = mca_btl_ugni_component.smsg_max_credits;
|
||||
mbox->smsg_attrib.mbox_offset = (uintptr_t) mbox->super.ptr - (uintptr_t) reg->base.alloc_base;
|
||||
mbox->smsg_attrib.msg_buffer = reg->base.alloc_base;
|
||||
mbox->smsg_attrib.buff_size = mca_btl_ugni_component.smsg_mbox_size;
|
||||
mbox->smsg_attrib.mem_hndl = reg->memory_hdl;
|
||||
mbox->attr.smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
|
||||
mbox->attr.smsg_attr.msg_maxsize = mca_btl_ugni_component.ugni_smsg_limit;
|
||||
mbox->attr.smsg_attr.mbox_maxcredit = mca_btl_ugni_component.smsg_max_credits;
|
||||
mbox->attr.smsg_attr.mbox_offset = (uintptr_t) mbox->super.ptr - (uintptr_t) base_reg->base;
|
||||
mbox->attr.smsg_attr.msg_buffer = base_reg->base;
|
||||
mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size;
|
||||
mbox->attr.smsg_attr.mem_hndl = ugni_reg->memory_hdl;
|
||||
|
||||
mbox->attr.proc_id = mca_btl_ugni_proc_name_to_id (*OMPI_PROC_MY_NAME);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_smsg_mbox_t, ompi_free_list_item_t,
|
||||
@ -146,8 +150,12 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
|
||||
ep->smsg_progressing = false;
|
||||
|
||||
/* disconnect if we get here */
|
||||
opal_mutex_lock (&ep->lock);
|
||||
|
||||
mca_btl_ugni_ep_disconnect (ep, false);
|
||||
|
||||
opal_mutex_unlock (&ep->lock);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
@ -155,6 +163,7 @@ static inline int
|
||||
mca_btl_ugni_handle_remote_smsg_overrun (mca_btl_ugni_module_t *btl)
|
||||
{
|
||||
gni_cq_entry_t event_data;
|
||||
size_t endpoint_count;
|
||||
unsigned int ep_index;
|
||||
int count, rc;
|
||||
|
||||
@ -169,8 +178,13 @@ mca_btl_ugni_handle_remote_smsg_overrun (mca_btl_ugni_module_t *btl)
|
||||
rc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
|
||||
} while (GNI_RC_NOT_DONE != rc);
|
||||
|
||||
for (ep_index = 0, count = 0 ; ep_index < btl->endpoint_count ; ++ep_index) {
|
||||
mca_btl_base_endpoint_t *ep = btl->endpoints[ep_index];
|
||||
endpoint_count = opal_pointer_array_get_size (&btl->endpoints);
|
||||
|
||||
for (ep_index = 0, count = 0 ; ep_index < endpoint_count ; ++ep_index) {
|
||||
mca_btl_base_endpoint_t *ep;
|
||||
|
||||
ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&btl->endpoints,
|
||||
ep_index);
|
||||
|
||||
if (NULL == ep || MCA_BTL_UGNI_EP_STATE_CONNECTED != ep->state) {
|
||||
continue;
|
||||
@ -220,7 +234,8 @@ int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl)
|
||||
|
||||
inst_id = GNI_CQ_GET_INST_ID(event_data);
|
||||
|
||||
ep = btl->endpoints[inst_id & 0xffffffff];
|
||||
ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&btl->endpoints, inst_id);
|
||||
|
||||
if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_CONNECTED != ep->state)) {
|
||||
/* due to the nature of datagrams we may get a smsg completion before
|
||||
we get mailbox info from the peer */
|
||||
|
@ -27,7 +27,7 @@ typedef enum {
|
||||
|
||||
typedef struct mca_btl_ugni_smsg_mbox_t {
|
||||
ompi_free_list_item_t super;
|
||||
gni_smsg_attr_t smsg_attrib;
|
||||
mca_btl_ugni_endpoint_attr_t attr;
|
||||
} mca_btl_ugni_smsg_mbox_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_smsg_mbox_t);
|
||||
@ -42,6 +42,11 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
|
||||
gni_cq_entry_t event_data;
|
||||
gni_return_t grc;
|
||||
|
||||
/* nothing to do */
|
||||
if (0 == ugni_module->active_send_count) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
grc = GNI_CqGetEvent (ugni_module->smsg_local_cq, &event_data);
|
||||
if (GNI_RC_NOT_DONE == grc) {
|
||||
return OMPI_SUCCESS;
|
||||
@ -64,6 +69,8 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
ugni_module->active_send_count--;
|
||||
|
||||
frag->flags |= MCA_BTL_UGNI_FRAG_SMSG_COMPLETE;
|
||||
|
||||
if (!(frag->flags & MCA_BTL_UGNI_FRAG_IGNORE)) {
|
||||
@ -82,9 +89,11 @@ static inline int ompi_mca_btl_ugni_smsg_send (mca_btl_ugni_base_frag_t *frag,
|
||||
grc = GNI_SmsgSendWTag (frag->endpoint->smsg_ep_handle, hdr, hdr_len,
|
||||
payload, payload_len, frag->msg_id, tag);
|
||||
|
||||
(void) mca_btl_ugni_progress_local_smsg ((mca_btl_ugni_module_t *) frag->endpoint->btl);
|
||||
|
||||
if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) {
|
||||
/* increment the active send counter */
|
||||
frag->endpoint->btl->active_send_count++;
|
||||
|
||||
(void) mca_btl_ugni_progress_local_smsg ((mca_btl_ugni_module_t *) frag->endpoint->btl);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -509,13 +509,13 @@ static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *
|
||||
|
||||
static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void *tgt, MPI_Comm comm) {
|
||||
int size, pos, res;
|
||||
MPI_Aint ext;
|
||||
OPAL_PTRDIFF_TYPE ext, lb;
|
||||
|
||||
if(NBC_Type_intrinsic(srctype)) {
|
||||
/* if we have the same types and they are contiguous (intrinsic
|
||||
* types are contiguous), we can just use a single memcpy */
|
||||
res = MPI_Type_extent(srctype, &ext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
res = ompi_datatype_get_extent (srctype, &lb, &ext);
|
||||
if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
memcpy(tgt, src, srccount * ext);
|
||||
|
||||
} else {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -14,6 +14,7 @@
|
||||
#include "common_ugni.h"
|
||||
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "opal/mca/db/db.h"
|
||||
|
||||
/* NTH: we need some options from the btl */
|
||||
#include "ompi/mca/btl/ugni/btl_ugni.h"
|
||||
@ -122,7 +123,7 @@ ompi_common_ugni_get_nic_address(int device_id)
|
||||
}
|
||||
|
||||
static int ompi_common_ugni_device_init (ompi_common_ugni_device_t *device,
|
||||
int comm_world_size, int device_id)
|
||||
int device_id)
|
||||
{
|
||||
int rc;
|
||||
|
||||
@ -141,22 +142,11 @@ static int ompi_common_ugni_device_init (ompi_common_ugni_device_t *device,
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
device->dev_eps = calloc (comm_world_size, sizeof (ompi_common_ugni_endpoint_t *));
|
||||
if (NULL == device->dev_eps) {
|
||||
OPAL_OUTPUT((0, "Error allocating space for endpoint pointers"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int ompi_common_ugni_device_fini (ompi_common_ugni_device_t *dev)
|
||||
{
|
||||
if (dev->dev_eps) {
|
||||
free (dev->dev_eps);
|
||||
dev->dev_eps = NULL;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -236,9 +226,9 @@ int ompi_common_ugni_fini (void)
|
||||
|
||||
int ompi_common_ugni_init (void)
|
||||
{
|
||||
int modes, rc, my_rank, i;
|
||||
size_t comm_world_size;
|
||||
ompi_proc_t *my_proc;
|
||||
int modes, rc, i;
|
||||
uint32_t my_rank, *ptr;
|
||||
|
||||
ompi_common_ugni_module_ref_count ++;
|
||||
|
||||
@ -247,14 +237,23 @@ int ompi_common_ugni_init (void)
|
||||
}
|
||||
|
||||
my_proc = ompi_proc_local ();
|
||||
|
||||
/* get a unique id from the runtime */
|
||||
#if defined(OMPI_DB_GLOBAL_RANK)
|
||||
ptr = &my_rank;
|
||||
rc = opal_db.fetch ((opal_identifier_t *) &my_proc->proc_name, OMPI_DB_GLOBAL_RANK,
|
||||
(void **) &ptr, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
my_rank = my_proc->proc_name.vpid;
|
||||
}
|
||||
#else
|
||||
my_rank = my_proc->proc_name.vpid;
|
||||
#endif
|
||||
|
||||
/* pull settings from ugni btl */
|
||||
ompi_common_ugni_module.rdma_max_retries =
|
||||
mca_btl_ugni_component.rdma_max_retries;
|
||||
|
||||
(void) ompi_proc_world (&comm_world_size);
|
||||
|
||||
/* Create a communication domain */
|
||||
modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED |
|
||||
GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL;
|
||||
@ -285,8 +284,7 @@ int ompi_common_ugni_init (void)
|
||||
sizeof (ompi_common_ugni_device_t));
|
||||
|
||||
for (i = 0 ; i < ompi_common_ugni_module.device_count ; ++i) {
|
||||
rc = ompi_common_ugni_device_init (ompi_common_ugni_module.devices + i,
|
||||
comm_world_size, i);
|
||||
rc = ompi_common_ugni_device_init (ompi_common_ugni_module.devices + i, i);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
OPAL_OUTPUT((-1, "error initializing uGNI device"));
|
||||
return rc;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -51,7 +51,6 @@ struct ompi_common_ugni_device_t {
|
||||
uint32_t dev_cpu_id;
|
||||
|
||||
size_t dev_ep_count;
|
||||
ompi_common_ugni_endpoint_t **dev_eps;
|
||||
void *btl_ctx;
|
||||
};
|
||||
typedef struct ompi_common_ugni_device_t ompi_common_ugni_device_t;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -12,13 +12,7 @@
|
||||
|
||||
#include "common_ugni.h"
|
||||
|
||||
static void ompi_common_ugni_ep_destruct (ompi_common_ugni_endpoint_t *ep)
|
||||
{
|
||||
ep->dev->dev_eps[ep->ep_rem_id] = NULL;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(ompi_common_ugni_endpoint_t, opal_object_t,
|
||||
NULL, ompi_common_ugni_ep_destruct);
|
||||
OBJ_CLASS_INSTANCE(ompi_common_ugni_endpoint_t, opal_object_t, NULL, NULL);
|
||||
|
||||
int ompi_common_ugni_endpoint_for_proc (ompi_common_ugni_device_t *dev, ompi_proc_t *peer_proc,
|
||||
ompi_common_ugni_endpoint_t **ep)
|
||||
@ -26,13 +20,10 @@ int ompi_common_ugni_endpoint_for_proc (ompi_common_ugni_device_t *dev, ompi_pro
|
||||
ompi_common_ugni_endpoint_t *endpoint;
|
||||
ompi_common_ugni_modex_t *modex;
|
||||
size_t msg_size;
|
||||
int rem_id, rc;
|
||||
int rc;
|
||||
|
||||
assert (NULL != dev && NULL != ep && peer_proc);
|
||||
|
||||
rem_id = peer_proc->proc_name.vpid;;
|
||||
|
||||
if (NULL == dev->dev_eps[rem_id]) {
|
||||
endpoint = OBJ_NEW(ompi_common_ugni_endpoint_t);
|
||||
if (OPAL_UNLIKELY(NULL == endpoint)) {
|
||||
assert (0);
|
||||
@ -40,16 +31,13 @@ int ompi_common_ugni_endpoint_for_proc (ompi_common_ugni_device_t *dev, ompi_pro
|
||||
}
|
||||
|
||||
/* Receive the modex */
|
||||
rc = ompi_modex_recv(&ompi_common_ugni_component,
|
||||
peer_proc, (void *)&modex, &msg_size);
|
||||
rc = ompi_modex_recv(&ompi_common_ugni_component, peer_proc,
|
||||
(void *) &modex, &msg_size);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
OPAL_OUTPUT((-1, "btl/ugni error receiving modex"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* these should be the same */
|
||||
assert (rem_id == modex->id);
|
||||
|
||||
endpoint->ep_rem_addr = modex->addr;
|
||||
endpoint->ep_rem_id = modex->id;
|
||||
|
||||
@ -57,12 +45,6 @@ int ompi_common_ugni_endpoint_for_proc (ompi_common_ugni_device_t *dev, ompi_pro
|
||||
|
||||
*ep = endpoint;
|
||||
|
||||
dev->dev_eps[rem_id] = endpoint;
|
||||
} else {
|
||||
OBJ_RETAIN(dev->dev_eps[rem_id]);
|
||||
*ep = dev->dev_eps[rem_id];
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -73,7 +55,8 @@ void ompi_common_ugni_endpoint_return (ompi_common_ugni_endpoint_t *ep)
|
||||
OBJ_RELEASE(ep);
|
||||
}
|
||||
|
||||
int ompi_common_ugni_ep_create (ompi_common_ugni_endpoint_t *cep, gni_cq_handle_t cq, gni_ep_handle_t *ep_handle)
|
||||
int ompi_common_ugni_ep_create (ompi_common_ugni_endpoint_t *cep, gni_cq_handle_t cq,
|
||||
gni_ep_handle_t *ep_handle)
|
||||
{
|
||||
gni_return_t grc;
|
||||
|
||||
@ -90,6 +73,7 @@ int ompi_common_ugni_ep_create (ompi_common_ugni_endpoint_t *cep, gni_cq_handle_
|
||||
|
||||
grc = GNI_EpBind (*ep_handle, cep->ep_rem_addr, cep->ep_rem_id);
|
||||
if (GNI_RC_SUCCESS != grc) {
|
||||
GNI_EpDestroy (*ep_handle);
|
||||
return ompi_common_rc_ugni_to_ompi (grc);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
|
@ -106,8 +106,10 @@ mca_mpool_base_module_t* mca_mpool_base_module_create(
|
||||
leave_pinned_pipeline is enabled (note that either of these
|
||||
leave_pinned variables may have been set by a user MCA
|
||||
param or elsewhere in the code base). Yes, we could have
|
||||
coded this more succinctly, but this is more clear. */
|
||||
if (ompi_mpi_leave_pinned > 0 || ompi_mpi_leave_pinned_pipeline) {
|
||||
coded this more succinctly, but this is more clear. Do not
|
||||
check memory hooks if the mpool explicity asked us not to. */
|
||||
if ((ompi_mpi_leave_pinned > 0 || ompi_mpi_leave_pinned_pipeline) &&
|
||||
!(module->flags & MCA_MPOOL_FLAGS_NO_HOOKS)) {
|
||||
use_mem_hooks = 1;
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/**
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -32,14 +32,20 @@
|
||||
#include "opal/mca/crs/crs.h"
|
||||
#include "opal/mca/crs/base/base.h"
|
||||
|
||||
#define MCA_MPOOL_FLAGS_CACHE_BYPASS 0x1
|
||||
#define MCA_MPOOL_FLAGS_PERSIST 0x2
|
||||
#define MCA_MPOOL_FLAGS_MPI_ALLOC_MEM 0x4
|
||||
#define MCA_MPOOL_FLAGS_INVALID 0x8
|
||||
#define MCA_MPOOL_FLAGS_CACHE_BYPASS 0x01
|
||||
#define MCA_MPOOL_FLAGS_PERSIST 0x02
|
||||
#define MCA_MPOOL_FLAGS_MPI_ALLOC_MEM 0x04
|
||||
#define MCA_MPOOL_FLAGS_INVALID 0x08
|
||||
#define MCA_MPOOL_FLAGS_SO_MEM 0x10
|
||||
#define MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM 0x20
|
||||
|
||||
#define MCA_MPOOL_FLAGS_CUDA_GPU_MEM 0x40
|
||||
|
||||
/* Only valid in mpool flags. Used to indicate that no external memory
|
||||
* hooks (ptmalloc2, etc) are required. */
|
||||
#define MCA_MPOOL_FLAGS_NO_HOOKS 0x80
|
||||
|
||||
|
||||
struct mca_mpool_base_resources_t;
|
||||
|
||||
struct mca_mpool_base_registration_t {
|
||||
@ -50,6 +56,7 @@ struct mca_mpool_base_registration_t {
|
||||
unsigned char* alloc_base;
|
||||
int32_t ref_count;
|
||||
uint32_t flags;
|
||||
void *mpool_context;
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
unsigned long long gpu_bufID;
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
|
52
ompi/mca/mpool/udreg/Makefile.am
Обычный файл
52
ompi/mca/mpool/udreg/Makefile.am
Обычный файл
@ -0,0 +1,52 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = $(mpool_udreg_CPPFLAGS)
|
||||
|
||||
sources = mpool_udreg_module.c mpool_udreg_component.c
|
||||
|
||||
if WANT_INSTALL_HEADERS
|
||||
ompidir = $(includedir)/openmpi/$(subdir)
|
||||
ompi_HEADERS = mpool_udreg.h
|
||||
endif
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_ompi_mpool_udreg_DSO
|
||||
component_noinst =
|
||||
component_install = mca_mpool_udreg.la
|
||||
else
|
||||
component_noinst = libmca_mpool_udreg.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_mpool_udreg_la_SOURCES = $(sources)
|
||||
mca_mpool_udreg_la_LDFLAGS = -module -avoid-version $(mpool_udreg_LDFLAGS)
|
||||
mca_mpool_udreg_la_LIBADD = $(mpool_udreg_LIBS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_mpool_udreg_la_SOURCES = $(sources)
|
||||
libmca_mpool_udreg_la_LIBADD = $(mpool_udreg_LIBS)
|
||||
libmca_mpool_udreg_la_LDFLAGS = -module -avoid-version $(mpool_udreg_LDFLAGS)
|
51
ompi/mca/mpool/udreg/configure.m4
Обычный файл
51
ompi/mca/mpool/udreg/configure.m4
Обычный файл
@ -0,0 +1,51 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2006 QLogic Corp. All rights reserved.
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AC_DEFUN([MCA_ompi_mpool_udreg_CONFIG],[
|
||||
AC_CONFIG_FILES([ompi/mca/mpool/udreg/Makefile])
|
||||
|
||||
AC_ARG_WITH([udreg], [AC_HELP_STRING([--with-udreg(=DIR)],
|
||||
[Build support for Cray udreg support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])])
|
||||
OMPI_CHECK_WITHDIR([udreg], [$with_udreg], [.])
|
||||
|
||||
mpool_udreg_happy="no"
|
||||
|
||||
if test "$with_udreg" != "no" ; then
|
||||
if test -n "$with_udreg" -a "$with_udreg" != "yes" ; then
|
||||
ompi_check_udreg_dir="$with_udreg"
|
||||
else
|
||||
ompi_check_udreg_dir=""
|
||||
fi
|
||||
|
||||
OMPI_CHECK_PACKAGE([mpool_udreg], [udreg_pub.h], [udreg], [UDREG_CacheCreate],
|
||||
[], [$ompi_check_udreg_dir], ["$ompi_check_udreg_dir/lib64"],
|
||||
[mpool_udreg_happy="yes"], [mpool_udreg_happy="no"])
|
||||
fi
|
||||
|
||||
AS_IF([test "$mpool_udreg_happy" = "yes"], [$1], [$2])
|
||||
|
||||
# substitute in the things needed to build ugni
|
||||
AC_SUBST([mpool_udreg_CPPFLAGS])
|
||||
AC_SUBST([mpool_udreg_LDFLAGS])
|
||||
AC_SUBST([mpool_udreg_LIBS])
|
||||
])dnl
|
169
ompi/mca/mpool/udreg/mpool_udreg.h
Обычный файл
169
ompi/mca/mpool/udreg/mpool_udreg.h
Обычный файл
@ -0,0 +1,169 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef MCA_MPOOL_UDREG_H
|
||||
#define MCA_MPOOL_UDREG_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#if HAVE_SYS_MMAN_H
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct mca_mpool_udreg_component_t {
|
||||
mca_mpool_base_component_t super;
|
||||
bool print_stats;
|
||||
int leave_pinned;
|
||||
opal_list_t huge_pages;
|
||||
bool use_huge_pages;
|
||||
};
|
||||
typedef struct mca_mpool_udreg_component_t mca_mpool_udreg_component_t;
|
||||
|
||||
OMPI_DECLSPEC extern mca_mpool_udreg_component_t mca_mpool_udreg_component;
|
||||
|
||||
struct mca_mpool_udreg_module_t;
|
||||
|
||||
struct mca_mpool_base_resources_t {
|
||||
/* the start of this mpool should match grdma */
|
||||
char *pool_name;
|
||||
void *reg_data;
|
||||
size_t sizeof_reg;
|
||||
int (*register_mem)(void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg);
|
||||
|
||||
/* udreg specific resources */
|
||||
bool use_kernel_cache;
|
||||
bool use_evict_w_unreg;
|
||||
int max_entries;
|
||||
size_t page_size;
|
||||
};
|
||||
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
|
||||
|
||||
struct mca_mpool_udreg_hugepage_t {
|
||||
opal_list_item_t super;
|
||||
unsigned long page_size;
|
||||
char *path;
|
||||
opal_list_t allocations;
|
||||
int cnt;
|
||||
};
|
||||
typedef struct mca_mpool_udreg_hugepage_t mca_mpool_udreg_hugepage_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_mpool_udreg_hugepage_t);
|
||||
|
||||
struct mca_mpool_udreg_hugepage_alloc_t {
|
||||
opal_list_item_t super;
|
||||
int fd;
|
||||
char *path;
|
||||
void *ptr;
|
||||
size_t size;
|
||||
mca_mpool_udreg_hugepage_t *huge_table;
|
||||
};
|
||||
typedef struct mca_mpool_udreg_hugepage_alloc_t mca_mpool_udreg_hugepage_alloc_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_mpool_udreg_hugepage_pool_item_t);
|
||||
|
||||
struct mca_mpool_udreg_module_t {
|
||||
mca_mpool_base_module_t super;
|
||||
struct mca_mpool_base_resources_t resources;
|
||||
ompi_free_list_t reg_list;
|
||||
mca_mpool_udreg_hugepage_t *huge_page;
|
||||
void *udreg_handle;
|
||||
};
|
||||
typedef struct mca_mpool_udreg_module_t mca_mpool_udreg_module_t;
|
||||
|
||||
|
||||
/*
|
||||
* Initializes the mpool module.
|
||||
*/
|
||||
int mca_mpool_udreg_module_init(mca_mpool_udreg_module_t *mpool);
|
||||
|
||||
/*
|
||||
* Returns base address of shared memory mapping.
|
||||
*/
|
||||
void *mca_mpool_udreg_base(mca_mpool_base_module_t *mpool);
|
||||
|
||||
/**
|
||||
* Allocate block of registered memory.
|
||||
*/
|
||||
void* mca_mpool_udreg_alloc(mca_mpool_base_module_t *mpool, size_t size,
|
||||
size_t align, uint32_t flags,
|
||||
mca_mpool_base_registration_t** registration);
|
||||
|
||||
/**
|
||||
* realloc block of registered memory
|
||||
*/
|
||||
void* mca_mpool_udreg_realloc( mca_mpool_base_module_t *mpool, void* addr,
|
||||
size_t size, mca_mpool_base_registration_t** registration);
|
||||
|
||||
/**
|
||||
* register block of memory
|
||||
*/
|
||||
int mca_mpool_udreg_register(mca_mpool_base_module_t* mpool, void *addr,
|
||||
size_t size, uint32_t flags, mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* deregister memory
|
||||
*/
|
||||
int mca_mpool_udreg_deregister(mca_mpool_base_module_t *mpool,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
|
||||
/**
|
||||
* free memory allocated by alloc function
|
||||
*/
|
||||
void mca_mpool_udreg_free(mca_mpool_base_module_t *mpool, void * addr,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
|
||||
/**
|
||||
* find registration for a given block of memory
|
||||
*/
|
||||
int mca_mpool_udreg_find(struct mca_mpool_base_module_t* mpool, void* addr,
|
||||
size_t size, mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* finalize mpool
|
||||
*/
|
||||
void mca_mpool_udreg_finalize(struct mca_mpool_base_module_t *mpool);
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Stae
|
||||
* @return OMPI_SUCCESS or failure status
|
||||
*/
|
||||
int mca_mpool_udreg_ft_event(int state);
|
||||
|
||||
/**
|
||||
* evict one unused registration from the mpool's lru.
|
||||
* @return true on success, false on failure
|
||||
*/
|
||||
bool mca_mpool_udreg_evict (struct mca_mpool_base_module_t *mpool);
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
209
ompi/mca/mpool/udreg/mpool_udreg_component.c
Обычный файл
209
ompi/mca/mpool/udreg/mpool_udreg_component.c
Обычный файл
@ -0,0 +1,209 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
|
||||
#include "ompi_config.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
#include "mpool_udreg.h"
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#include <fcntl.h>
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static int udreg_open(void);
|
||||
static int udreg_close(void);
|
||||
static int udreg_register(void);
|
||||
static mca_mpool_base_module_t* udreg_init(
|
||||
struct mca_mpool_base_resources_t* resources);
|
||||
|
||||
mca_mpool_udreg_component_t mca_mpool_udreg_component = {
|
||||
{
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
{
|
||||
MCA_MPOOL_BASE_VERSION_2_0_0,
|
||||
|
||||
"udreg", /* MCA component name */
|
||||
OMPI_MAJOR_VERSION, /* MCA component major version */
|
||||
OMPI_MINOR_VERSION, /* MCA component minor version */
|
||||
OMPI_RELEASE_VERSION, /* MCA component release version */
|
||||
udreg_open, /* component open */
|
||||
udreg_close,
|
||||
NULL,
|
||||
udreg_register
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
|
||||
udreg_init
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* component open/close/init function
|
||||
*/
|
||||
static int udreg_open(void)
|
||||
{
|
||||
OBJ_CONSTRUCT(&mca_mpool_udreg_component.huge_pages, opal_list_t);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int udreg_register(void)
|
||||
{
|
||||
mca_mpool_udreg_component.print_stats = false;
|
||||
(void) mca_base_component_var_register(&mca_mpool_udreg_component.super.mpool_version,
|
||||
"print_stats", "print pool usage statistics at the end of the run",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_mpool_udreg_component.print_stats);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int udreg_close(void)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
|
||||
while (NULL != (item = opal_list_remove_first (&mca_mpool_udreg_component.huge_pages))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&mca_mpool_udreg_component.huge_pages);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int page_compare (opal_list_item_t **a,
|
||||
opal_list_item_t **b) {
|
||||
mca_mpool_udreg_hugepage_t *pagea = (mca_mpool_udreg_hugepage_t *) *a;
|
||||
mca_mpool_udreg_hugepage_t *pageb = (mca_mpool_udreg_hugepage_t *) *b;
|
||||
if (pagea->page_size > pageb->page_size) {
|
||||
return 1;
|
||||
} else if (pagea->page_size < pageb->page_size) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void udreg_find_hugepages (void) {
|
||||
FILE *fh;
|
||||
char *path;
|
||||
char buffer[1024];
|
||||
char *ctx, *tok;
|
||||
|
||||
fh = fopen ("/proc/mounts", "r");
|
||||
if (NULL == fh) {
|
||||
return;
|
||||
}
|
||||
|
||||
while (fgets (buffer, 1024, fh)) {
|
||||
mca_mpool_udreg_hugepage_t *pool;
|
||||
|
||||
(void) strtok_r (buffer, " ", &ctx);
|
||||
path = strtok_r (NULL, " ", &ctx);
|
||||
tok = strtok_r (NULL, " ", &ctx);
|
||||
|
||||
if (0 != strcmp (tok, "hugetlbfs")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
pool = OBJ_NEW(mca_mpool_udreg_hugepage_t);
|
||||
if (NULL == pool) {
|
||||
break;
|
||||
}
|
||||
|
||||
pool->path = strdup (path);
|
||||
|
||||
tok = strtok_r (NULL, " ", &ctx);
|
||||
tok = strtok_r (tok, ",", &ctx);
|
||||
|
||||
do {
|
||||
if (0 == strncmp (tok, "pagesize", 8)) {
|
||||
break;
|
||||
}
|
||||
tok = strtok_r (NULL, ",", &ctx);
|
||||
} while (tok);
|
||||
sscanf (tok, "pagesize=%lu", &pool->page_size);
|
||||
|
||||
opal_list_append (&mca_mpool_udreg_component.huge_pages, &pool->super);
|
||||
}
|
||||
|
||||
fclose (fh);
|
||||
|
||||
opal_list_sort (&mca_mpool_udreg_component.huge_pages, page_compare);
|
||||
|
||||
mca_mpool_udreg_component.use_huge_pages =
|
||||
!!(opal_list_get_size (&mca_mpool_udreg_component.huge_pages));
|
||||
}
|
||||
|
||||
|
||||
|
||||
static mca_mpool_base_module_t *
|
||||
udreg_init(struct mca_mpool_base_resources_t *resources)
|
||||
{
|
||||
mca_mpool_udreg_module_t* mpool_module;
|
||||
static int inited = false;
|
||||
int rc;
|
||||
|
||||
/* Set this here (vs in component.c) because
|
||||
ompi_mpi_leave_pinned* may have been set after MCA params were
|
||||
read (e.g., by the openib btl) */
|
||||
mca_mpool_udreg_component.leave_pinned = (int)
|
||||
(1 == ompi_mpi_leave_pinned || ompi_mpi_leave_pinned_pipeline);
|
||||
|
||||
if (!inited) {
|
||||
inited = true;
|
||||
udreg_find_hugepages ();
|
||||
}
|
||||
|
||||
mpool_module =
|
||||
(mca_mpool_udreg_module_t *) malloc (sizeof (mca_mpool_udreg_module_t));
|
||||
|
||||
memmove (&mpool_module->resources, resources, sizeof (*resources));
|
||||
|
||||
rc = mca_mpool_udreg_module_init(mpool_module);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
free (mpool_module);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return &mpool_module->super;
|
||||
}
|
488
ompi/mca/mpool/udreg/mpool_udreg_module.c
Обычный файл
488
ompi/mca/mpool/udreg/mpool_udreg_module.c
Обычный файл
@ -0,0 +1,488 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
|
||||
#include "ompi_config.h"
|
||||
#include "opal/align.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "mpool_udreg.h"
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#ifdef HAVE_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
#include "opal/include/opal_stdint.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
|
||||
#include <udreg_pub.h>
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
static void *mca_mpool_udreg_reg_func (void *addr, uint64_t len, void *reg_context);
|
||||
static uint32_t mca_mpool_udreg_dereg_func (void *device_data, void *dreg_context);
|
||||
|
||||
static void mca_mpool_udreg_hugepage_constructor (mca_mpool_udreg_hugepage_t *huge_page)
|
||||
{
|
||||
memset ((char *)huge_page + sizeof(huge_page->super), 0, sizeof (*huge_page) - sizeof (huge_page->super));
|
||||
OBJ_CONSTRUCT(&huge_page->allocations, opal_list_t);
|
||||
}
|
||||
|
||||
static void mca_mpool_udreg_hugepage_destructor (mca_mpool_udreg_hugepage_t *huge_page)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
|
||||
if (huge_page->path) {
|
||||
free (huge_page->path);
|
||||
}
|
||||
|
||||
while (NULL != (item = opal_list_remove_first (&huge_page->allocations))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&huge_page->allocations);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_mpool_udreg_hugepage_t, opal_list_item_t,
|
||||
mca_mpool_udreg_hugepage_constructor,
|
||||
mca_mpool_udreg_hugepage_destructor);
|
||||
|
||||
static void mca_mpool_udreg_hugepage_alloc_constructor (mca_mpool_udreg_hugepage_alloc_t *alloc)
|
||||
{
|
||||
memset ((char *)alloc + sizeof(alloc->super), 0, sizeof (*alloc) - sizeof (alloc->super));
|
||||
alloc->fd = -1;
|
||||
}
|
||||
|
||||
static void mca_mpool_udreg_hugepage_alloc_destructor (mca_mpool_udreg_hugepage_alloc_t *alloc)
|
||||
{
|
||||
if (NULL != alloc->ptr) {
|
||||
munmap (alloc->ptr, alloc->size);
|
||||
}
|
||||
|
||||
if (NULL == alloc->path) {
|
||||
return;
|
||||
}
|
||||
|
||||
free (alloc->path);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_mpool_udreg_hugepage_alloc_t, opal_list_item_t,
|
||||
mca_mpool_udreg_hugepage_alloc_constructor,
|
||||
mca_mpool_udreg_hugepage_alloc_destructor);
|
||||
|
||||
|
||||
static mca_mpool_udreg_hugepage_t *udreg_find_matching_pagesize (size_t size) {
|
||||
mca_mpool_udreg_hugepage_t *huge_table;
|
||||
opal_list_item_t *item;
|
||||
|
||||
for (item = opal_list_get_first (&mca_mpool_udreg_component.huge_pages) ;
|
||||
item != opal_list_get_end (&mca_mpool_udreg_component.huge_pages) ;
|
||||
item = opal_list_get_next (item)) {
|
||||
huge_table = (mca_mpool_udreg_hugepage_t *) item;
|
||||
|
||||
if (huge_table->page_size == size) {
|
||||
return huge_table;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Initializes the mpool module.
|
||||
*/
|
||||
int mca_mpool_udreg_module_init(mca_mpool_udreg_module_t* mpool)
|
||||
{
|
||||
struct udreg_cache_attr cache_attr;
|
||||
int urc;
|
||||
|
||||
mpool->super.mpool_component = &mca_mpool_udreg_component.super;
|
||||
mpool->super.mpool_base = NULL; /* no base .. */
|
||||
mpool->super.mpool_alloc = mca_mpool_udreg_alloc;
|
||||
mpool->super.mpool_realloc = mca_mpool_udreg_realloc;
|
||||
mpool->super.mpool_free = mca_mpool_udreg_free;
|
||||
mpool->super.mpool_register = mca_mpool_udreg_register;
|
||||
mpool->super.mpool_find = mca_mpool_udreg_find;
|
||||
mpool->super.mpool_deregister = mca_mpool_udreg_deregister;
|
||||
/* This module relies on udreg for notification of memory release */
|
||||
mpool->super.mpool_release_memory = NULL;
|
||||
mpool->super.mpool_finalize = mca_mpool_udreg_finalize;
|
||||
mpool->super.mpool_ft_event = mca_mpool_udreg_ft_event;
|
||||
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM | MCA_MPOOL_FLAGS_NO_HOOKS;
|
||||
|
||||
if (4096 < mpool->resources.page_size) {
|
||||
mpool->huge_page = udreg_find_matching_pagesize (mpool->resources.page_size);
|
||||
} else {
|
||||
mpool->huge_page = NULL;
|
||||
}
|
||||
|
||||
cache_attr.modes = 0;
|
||||
|
||||
/* Create udreg cache */
|
||||
if (mpool->resources.use_kernel_cache) {
|
||||
cache_attr.modes |= UDREG_CC_MODE_USE_KERNEL_CACHE;
|
||||
}
|
||||
|
||||
if (mpool->resources.use_evict_w_unreg) {
|
||||
cache_attr.modes |= UDREG_CC_MODE_USE_EVICT_W_UNREG;
|
||||
}
|
||||
|
||||
if (mca_mpool_udreg_component.leave_pinned) {
|
||||
cache_attr.modes |= UDREG_CC_MODE_USE_LAZY_DEREG;
|
||||
}
|
||||
|
||||
strncpy (cache_attr.cache_name, mpool->resources.pool_name, UDREG_MAX_CACHENAME_LEN);
|
||||
cache_attr.max_entries = mpool->resources.max_entries;
|
||||
cache_attr.debug_mode = 0;
|
||||
cache_attr.debug_rank = 0;
|
||||
cache_attr.reg_context = mpool;
|
||||
cache_attr.dreg_context = mpool;
|
||||
cache_attr.destructor_context = mpool;
|
||||
cache_attr.device_reg_func = mca_mpool_udreg_reg_func;
|
||||
cache_attr.device_dereg_func = mca_mpool_udreg_dereg_func;
|
||||
cache_attr.destructor_callback = NULL;
|
||||
|
||||
/* attempt to create the udreg cache. this will fail if one already exists */
|
||||
(void) UDREG_CacheCreate (&cache_attr);
|
||||
|
||||
urc = UDREG_CacheAccess (mpool->resources.pool_name, (udreg_cache_handle_t *) &mpool->udreg_handle);
|
||||
if (UDREG_RC_SUCCESS != urc) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&mpool->reg_list, ompi_free_list_t);
|
||||
ompi_free_list_init_new(&mpool->reg_list, mpool->resources.sizeof_reg,
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_mpool_base_registration_t),
|
||||
0, opal_cache_line_size, 0, -1, 32, NULL);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* udreg callback functions */
|
||||
static void *mca_mpool_udreg_reg_func (void *addr, uint64_t len, void *reg_context)
|
||||
{
|
||||
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) reg_context;
|
||||
mca_mpool_base_registration_t *udreg_reg;
|
||||
ompi_free_list_item_t *item;
|
||||
int rc;
|
||||
|
||||
OMPI_FREE_LIST_GET_MT(&mpool_udreg->reg_list, item);
|
||||
if (NULL == item) {
|
||||
return NULL;
|
||||
}
|
||||
udreg_reg = (mca_mpool_base_registration_t *) item;
|
||||
|
||||
udreg_reg->mpool = reg_context;
|
||||
udreg_reg->base = addr;
|
||||
udreg_reg->bound = (void *)((uintptr_t) addr + len);
|
||||
|
||||
rc = mpool_udreg->resources.register_mem(mpool_udreg->resources.reg_data,
|
||||
addr, len, udreg_reg);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
OMPI_FREE_LIST_RETURN_MT(&mpool_udreg->reg_list, item);
|
||||
mpool_udreg = NULL;
|
||||
}
|
||||
|
||||
return udreg_reg;
|
||||
}
|
||||
|
||||
static uint32_t mca_mpool_udreg_dereg_func (void *device_data, void *dreg_context)
|
||||
{
|
||||
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) dreg_context;
|
||||
mca_mpool_base_registration_t *udreg_reg = (mca_mpool_base_registration_t *) device_data;
|
||||
int rc;
|
||||
|
||||
rc = mpool_udreg->resources.deregister_mem(mpool_udreg->resources.reg_data, udreg_reg);
|
||||
|
||||
if (OPAL_LIKELY(OMPI_SUCCESS == rc)) {
|
||||
OMPI_FREE_LIST_RETURN_MT(&mpool_udreg->reg_list,
|
||||
(ompi_free_list_item_t *) udreg_reg);
|
||||
}
|
||||
/* might be worth printing out a warning if an error occurs here */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* */
|
||||
|
||||
static int mca_mpool_udreg_alloc_huge (mca_mpool_udreg_module_t *mpool, size_t size,
|
||||
void **addr, void **base_addr) {
|
||||
mca_mpool_udreg_hugepage_alloc_t *alloc;
|
||||
int rc;
|
||||
|
||||
alloc = OBJ_NEW(mca_mpool_udreg_hugepage_alloc_t);
|
||||
alloc->size = size;
|
||||
|
||||
rc = asprintf (&alloc->path, "%s/hugepage.openmpi.%d.%d", mpool->huge_page->path,
|
||||
getpid (), mpool->huge_page->cnt++);
|
||||
if (0 > rc) {
|
||||
OBJ_RELEASE(alloc);
|
||||
return -1;
|
||||
}
|
||||
|
||||
alloc->fd = open (alloc->path, O_RDWR | O_CREAT, 0600);
|
||||
if (-1 == alloc->fd) {
|
||||
OBJ_RELEASE(alloc);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != ftruncate (alloc->fd, size)) {
|
||||
close (alloc->fd);
|
||||
unlink (alloc->path);
|
||||
OBJ_RELEASE(alloc);
|
||||
return -1;
|
||||
}
|
||||
|
||||
alloc->ptr = mmap (NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
alloc->fd, 0);
|
||||
if (NULL == alloc->ptr) {
|
||||
OBJ_RELEASE(alloc);
|
||||
return -1;
|
||||
}
|
||||
|
||||
close (alloc->fd);
|
||||
unlink (alloc->path);
|
||||
|
||||
alloc->huge_table = mpool->huge_page;
|
||||
|
||||
opal_list_append (&mpool->huge_page->allocations, &alloc->super);
|
||||
|
||||
*addr = alloc->ptr;
|
||||
*base_addr = alloc;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* allocate function
|
||||
*/
|
||||
void* mca_mpool_udreg_alloc(mca_mpool_base_module_t *mpool, size_t size,
|
||||
size_t align, uint32_t flags, mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_udreg_module_t *udreg_module = (mca_mpool_udreg_module_t *) mpool;
|
||||
void *base_addr, *addr;
|
||||
|
||||
if(0 == align)
|
||||
align = mca_mpool_base_page_size;
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
/* CUDA cannot handle registering overlapping regions, so make
|
||||
* sure each region is page sized and page aligned. */
|
||||
align = mca_mpool_base_page_size;
|
||||
size = OPAL_ALIGN(size, mca_mpool_base_page_size, size_t);
|
||||
#endif
|
||||
|
||||
addr = base_addr = NULL;
|
||||
|
||||
if (NULL != udreg_module->huge_page) {
|
||||
size = OPAL_ALIGN(size, udreg_module->huge_page->page_size, size_t);
|
||||
mca_mpool_udreg_alloc_huge (udreg_module, size, &addr, &base_addr);
|
||||
} else {
|
||||
#ifdef HAVE_POSIX_MEMALIGN
|
||||
if((errno = posix_memalign(&base_addr, align, size)) != 0)
|
||||
return NULL;
|
||||
|
||||
addr = base_addr;
|
||||
#else
|
||||
base_addr = malloc(size + align);
|
||||
if(NULL == base_addr)
|
||||
return NULL;
|
||||
|
||||
addr = (void*)OPAL_ALIGN((uintptr_t)base_addr, align, uintptr_t);
|
||||
#endif
|
||||
}
|
||||
|
||||
if(OMPI_SUCCESS != mca_mpool_udreg_register(mpool, addr, size, flags, reg)) {
|
||||
free(base_addr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
(*reg)->alloc_base = (unsigned char *) base_addr;
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
bool mca_mpool_udreg_evict (struct mca_mpool_base_module_t *mpool)
|
||||
{
|
||||
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool;
|
||||
udreg_return_t urc;
|
||||
|
||||
urc = UDREG_Evict (mpool_udreg->udreg_handle);
|
||||
return (UDREG_RC_SUCCESS == urc);
|
||||
}
|
||||
|
||||
/*
|
||||
* register memory
|
||||
*/
|
||||
int mca_mpool_udreg_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, uint32_t flags,
|
||||
mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool;
|
||||
mca_mpool_base_registration_t *udreg_reg;
|
||||
bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS);
|
||||
udreg_entry_t *udreg_entry;
|
||||
udreg_return_t urc;
|
||||
|
||||
if (false == bypass_cache) {
|
||||
/* Get a udreg entry for this region */
|
||||
while (UDREG_RC_ERROR_RESOURCE ==
|
||||
(urc = UDREG_Register (mpool_udreg->udreg_handle, addr, size, &udreg_entry))) {
|
||||
/* try to remove one unused reg and retry */
|
||||
if (!mca_mpool_udreg_evict (mpool)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
udreg_reg = (mca_mpool_base_registration_t *) udreg_entry->device_data;
|
||||
udreg_reg->mpool_context = udreg_entry;
|
||||
} else {
|
||||
/* if cache bypass is requested don't use the udreg cache */
|
||||
while (NULL == (udreg_reg = mca_mpool_udreg_reg_func (addr, size, mpool))) {
|
||||
/* try to remove one unused reg and retry */
|
||||
if (!mca_mpool_udreg_evict (mpool)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
udreg_reg->mpool_context = NULL;
|
||||
}
|
||||
|
||||
if (NULL == udreg_reg) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
udreg_reg->flags = flags;
|
||||
|
||||
*reg = udreg_reg;
|
||||
(*reg)->ref_count++;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* realloc function
|
||||
*/
|
||||
void* mca_mpool_udreg_realloc(mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_base_registration_t *old_reg = *reg;
|
||||
void *new_mem = mca_mpool_udreg_alloc(mpool, size, 0, old_reg->flags, reg);
|
||||
memcpy(new_mem, addr, old_reg->bound - old_reg->base + 1);
|
||||
mca_mpool_udreg_free(mpool, addr, old_reg);
|
||||
|
||||
return new_mem;
|
||||
}
|
||||
|
||||
static void mca_mpool_udreg_free_huge (mca_mpool_udreg_hugepage_alloc_t *alloc) {
|
||||
opal_list_remove_item (&alloc->huge_table->allocations, &alloc->super);
|
||||
OBJ_RELEASE(alloc);
|
||||
}
|
||||
|
||||
/**
|
||||
* free function
|
||||
*/
|
||||
void mca_mpool_udreg_free(mca_mpool_base_module_t *mpool, void *addr,
|
||||
mca_mpool_base_registration_t *registration)
|
||||
{
|
||||
mca_mpool_udreg_module_t *udreg_module = (mca_mpool_udreg_module_t *) mpool;
|
||||
mca_mpool_udreg_deregister(mpool, registration);
|
||||
|
||||
if (udreg_module->huge_page) {
|
||||
mca_mpool_udreg_free_huge ((mca_mpool_udreg_hugepage_alloc_t *) registration->alloc_base);
|
||||
} else {
|
||||
free (registration->alloc_base);
|
||||
}
|
||||
}
|
||||
|
||||
int mca_mpool_udreg_find(struct mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
*reg = NULL;
|
||||
return OMPI_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
int mca_mpool_udreg_deregister(struct mca_mpool_base_module_t *mpool,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool;
|
||||
|
||||
assert(reg->ref_count > 0);
|
||||
|
||||
reg->ref_count--;
|
||||
|
||||
if (0 == reg->ref_count && reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS) {
|
||||
mca_mpool_udreg_dereg_func (reg, mpool);
|
||||
} else if (!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) {
|
||||
UDREG_DecrRefcount (mpool_udreg->udreg_handle, reg->mpool_context);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
void mca_mpool_udreg_finalize(struct mca_mpool_base_module_t *mpool)
|
||||
{
|
||||
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t*)mpool;
|
||||
|
||||
/* Statistic */
|
||||
if (true == mca_mpool_udreg_component.print_stats) {
|
||||
uint64_t hit = 0, miss = 0, evicted = 0;
|
||||
|
||||
(void) UDREG_GetStat (mpool_udreg->udreg_handle,
|
||||
UDREG_STAT_CACHE_HIT, &hit);
|
||||
|
||||
(void) UDREG_GetStat (mpool_udreg->udreg_handle,
|
||||
UDREG_STAT_CACHE_MISS, &miss);
|
||||
|
||||
(void) UDREG_GetStat (mpool_udreg->udreg_handle,
|
||||
UDREG_STAT_CACHE_EVICTED, &evicted);
|
||||
|
||||
opal_output(0, "%s udreg: stats (hit/miss/evicted): %" PRIu64 "/%" PRIu64 "/%" PRIu64 "\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hit, miss, evicted);
|
||||
}
|
||||
|
||||
UDREG_CacheRelease (mpool_udreg->udreg_handle);
|
||||
OBJ_DESTRUCT(&mpool_udreg->reg_list);
|
||||
}
|
||||
|
||||
int mca_mpool_udreg_ft_event(int state) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user