Two new mpools. They are not used now (and by default, not compiled)
but they will be soon. Provide support for GPU buffer transfers within a node. This commit was SVN r26008.
Этот коммит содержится в:
родитель
94549d024b
Коммит
c7a0ce2755
@ -355,7 +355,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
|
||||
CUdeviceptr pbase;
|
||||
size_t psize;
|
||||
|
||||
mca_mpool_rcuda_reg_t *cuda_reg = (mca_mpool_rcuda_reg_t*)newreg;
|
||||
mca_mpool_common_cuda_reg_t *cuda_reg = (mca_mpool_common_cuda_reg_t*)newreg;
|
||||
|
||||
/* We should only be there if this is a CUDA device pointer */
|
||||
result = cuPointerGetAttribute(&memType,
|
||||
@ -416,7 +416,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
|
||||
*/
|
||||
int cuda_ungetmemhandle(void *reg_data, mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
CUDA_DUMP_EVTHANDLE((10, ((mca_mpool_rcuda_reg_t *)reg)->evtHandle, "cuda_ungetmemhandle"));
|
||||
CUDA_DUMP_EVTHANDLE((10, ((mca_mpool_common_cuda_reg_t *)reg)->evtHandle, "cuda_ungetmemhandle"));
|
||||
opal_output_verbose(5, mca_common_cuda_output,
|
||||
"CUDA: cuda_ungetmemhandle: base=%p",
|
||||
reg_data);
|
||||
@ -434,7 +434,7 @@ int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *n
|
||||
{
|
||||
CUresult result;
|
||||
CUipcMemHandle memHandle;
|
||||
mca_mpool_rcuda_reg_t *cuda_newreg = (mca_mpool_rcuda_reg_t*)newreg;
|
||||
mca_mpool_common_cuda_reg_t *cuda_newreg = (mca_mpool_common_cuda_reg_t*)newreg;
|
||||
|
||||
/* Need to copy into memory handle for call into CUDA library. */
|
||||
memcpy(&memHandle, cuda_newreg->memHandle, sizeof(memHandle));
|
||||
@ -473,7 +473,7 @@ int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *n
|
||||
int cuda_closememhandle(void *reg_data, mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
CUresult result;
|
||||
mca_mpool_rcuda_reg_t *cuda_reg = (mca_mpool_rcuda_reg_t*)reg;
|
||||
mca_mpool_common_cuda_reg_t *cuda_reg = (mca_mpool_common_cuda_reg_t*)reg;
|
||||
|
||||
result = cuIpcCloseMemHandle((CUdeviceptr)cuda_reg->base.alloc_base);
|
||||
if (CUDA_SUCCESS != result) {
|
||||
@ -526,7 +526,7 @@ void mca_common_cuda_destruct_event(uint64_t *event)
|
||||
* Put remote event on stream to ensure that the the start of the
|
||||
* copy does not start until the completion of the event.
|
||||
*/
|
||||
void mca_common_wait_stream_synchronize(mca_mpool_rcuda_reg_t *rget_reg)
|
||||
void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg)
|
||||
{
|
||||
CUipcEventHandle evtHandle;
|
||||
CUevent event;
|
||||
@ -724,8 +724,8 @@ int progress_one_cuda_event(struct mca_btl_base_descriptor_t **frag) {
|
||||
* Need to make sure the handle we are retrieving from the cache is still
|
||||
* valid. Compare the cached handle to the one received.
|
||||
*/
|
||||
int mca_common_cuda_memhandle_matches(mca_mpool_rcuda_reg_t *new_reg,
|
||||
mca_mpool_rcuda_reg_t *old_reg)
|
||||
int mca_common_cuda_memhandle_matches(mca_mpool_common_cuda_reg_t *new_reg,
|
||||
mca_mpool_common_cuda_reg_t *old_reg)
|
||||
{
|
||||
|
||||
if (0 == memcmp(new_reg->memHandle, old_reg->memHandle, sizeof(new_reg->memHandle))) {
|
||||
|
@ -21,28 +21,30 @@
|
||||
#define OMPI_MCA_COMMON_CUDA_H
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
|
||||
struct mca_mpool_rcuda_reg_t {
|
||||
#define MEMHANDLE_SIZE 8
|
||||
#define EVTHANDLE_SIZE 8
|
||||
struct mca_mpool_common_cuda_reg_t {
|
||||
mca_mpool_base_registration_t base;
|
||||
uint64_t memHandle[8];
|
||||
uint64_t evtHandle[8];
|
||||
uint64_t memHandle[MEMHANDLE_SIZE];
|
||||
uint64_t evtHandle[EVTHANDLE_SIZE];
|
||||
uint64_t event;
|
||||
};
|
||||
typedef struct mca_mpool_rcuda_reg_t mca_mpool_rcuda_reg_t;
|
||||
typedef struct mca_mpool_common_cuda_reg_t mca_mpool_common_cuda_reg_t;
|
||||
|
||||
|
||||
OMPI_DECLSPEC void mca_common_cuda_register(void *ptr, size_t amount, char *msg);
|
||||
|
||||
OMPI_DECLSPEC void mca_common_cuda_unregister(void *ptr, char *msg);
|
||||
|
||||
OMPI_DECLSPEC void mca_common_wait_stream_synchronize(mca_mpool_rcuda_reg_t *rget_reg);
|
||||
OMPI_DECLSPEC void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
|
||||
struct mca_btl_base_descriptor_t *, int *done);
|
||||
|
||||
OMPI_DECLSPEC int progress_one_cuda_event(struct mca_btl_base_descriptor_t **);
|
||||
|
||||
OMPI_DECLSPEC int mca_common_cuda_memhandle_matches(mca_mpool_rcuda_reg_t *new_reg,
|
||||
mca_mpool_rcuda_reg_t *old_reg);
|
||||
OMPI_DECLSPEC int mca_common_cuda_memhandle_matches(mca_mpool_common_cuda_reg_t *new_reg,
|
||||
mca_mpool_common_cuda_reg_t *old_reg);
|
||||
|
||||
OMPI_DECLSPEC void mca_common_cuda_construct_event_and_handle(uint64_t **event, void **handle);
|
||||
OMPI_DECLSPEC void mca_common_cuda_destruct_event(uint64_t *event);
|
||||
|
57
ompi/mca/mpool/gpusm/Makefile.am
Обычный файл
57
ompi/mca/mpool/gpusm/Makefile.am
Обычный файл
@ -0,0 +1,57 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = $(mpool_gpusm_CPPFLAGS)
|
||||
|
||||
sources = \
|
||||
mpool_gpusm_module.c \
|
||||
mpool_gpusm_component.c
|
||||
|
||||
if WANT_INSTALL_HEADERS
|
||||
ompidir = $(includedir)/openmpi/$(subdir)
|
||||
ompi_HEADERS = mpool_gpusm.h
|
||||
endif
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_ompi_mpool_gpusm_DSO
|
||||
component_noinst =
|
||||
component_install = mca_mpool_gpusm.la
|
||||
else
|
||||
component_noinst = libmca_mpool_gpusm.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_mpool_gpusm_la_SOURCES = $(sources)
|
||||
mca_mpool_gpusm_la_LDFLAGS = -module -avoid-version
|
||||
mca_mpool_gpusm_la_LIBADD = $(mpool_gpusm_LIBS)
|
||||
if MCA_ompi_cuda_support
|
||||
mca_mpool_gpusm_la_LIBADD += \
|
||||
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
|
||||
endif
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_mpool_gpusm_la_SOURCES = $(sources)
|
||||
libmca_mpool_gpusm_la_LDFLAGS = -module -avoid-version
|
||||
libmca_mpool_gpusm_la_LIBADD = $(mpool_gpusm_LIBS)
|
25
ompi/mca/mpool/gpusm/configure.m4
Обычный файл
25
ompi/mca/mpool/gpusm/configure.m4
Обычный файл
@ -0,0 +1,25 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
#
|
||||
# If CUDA support was requested, then build the CUDA memory pools.
|
||||
# This code checks the variable CUDA_SUPPORT which was set earlier in
|
||||
# the configure sequence by the opal_configure_options.m4 code.
|
||||
#
|
||||
|
||||
AC_DEFUN([MCA_ompi_mpool_gpusm_CONFIG],[
|
||||
AC_CONFIG_FILES([ompi/mca/mpool/gpusm/Makefile])
|
||||
|
||||
# Use CUDA_SUPPORT which was filled in by the opal configure code.
|
||||
AS_IF([test "x$CUDA_SUPPORT_41" = "x1"],
|
||||
[$1],
|
||||
[$2])
|
||||
|
||||
])dnl
|
103
ompi/mca/mpool/gpusm/mpool_gpusm.h
Обычный файл
103
ompi/mca/mpool/gpusm/mpool_gpusm.h
Обычный файл
@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef MCA_MPOOL_GPUSM_H
|
||||
#define MCA_MPOOL_GPUSM_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
#define MEMHANDLE_SIZE 8
|
||||
#define EVTHANDLE_SIZE 8
|
||||
struct mca_mpool_gpusm_registration_t {
|
||||
mca_mpool_base_registration_t base;
|
||||
uint64_t memHandle[MEMHANDLE_SIZE];
|
||||
uint64_t evtHandle[EVTHANDLE_SIZE];
|
||||
uint64_t event;
|
||||
};
|
||||
typedef struct mca_mpool_gpusm_registration_t mca_mpool_gpusm_registration_t;
|
||||
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_gpusm_registration_t);
|
||||
|
||||
struct mca_mpool_gpusm_component_t {
|
||||
mca_mpool_base_component_t super;
|
||||
};
|
||||
typedef struct mca_mpool_gpusm_component_t mca_mpool_gpusm_component_t;
|
||||
|
||||
OMPI_DECLSPEC extern mca_mpool_gpusm_component_t mca_mpool_gpusm_component;
|
||||
|
||||
struct mca_mpool_base_resources_t {
|
||||
void *reg_data;
|
||||
size_t sizeof_reg;
|
||||
int (*register_mem)(void *base, size_t size, mca_mpool_base_registration_t *newreg,
|
||||
mca_mpool_base_registration_t *hdrreg);
|
||||
int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg);
|
||||
};
|
||||
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
|
||||
|
||||
struct mca_mpool_gpusm_module_t {
|
||||
mca_mpool_base_module_t super;
|
||||
struct mca_mpool_base_resources_t resources;
|
||||
ompi_free_list_t reg_list;
|
||||
}; typedef struct mca_mpool_gpusm_module_t mca_mpool_gpusm_module_t;
|
||||
|
||||
/*
|
||||
* Initializes the mpool module.
|
||||
*/
|
||||
void mca_mpool_gpusm_module_init(mca_mpool_gpusm_module_t *mpool);
|
||||
|
||||
/**
|
||||
* register block of memory
|
||||
*/
|
||||
int mca_mpool_gpusm_register(mca_mpool_base_module_t* mpool, void *addr,
|
||||
size_t size, uint32_t flags, mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* deregister memory
|
||||
*/
|
||||
int mca_mpool_gpusm_deregister(mca_mpool_base_module_t *mpool,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
|
||||
/**
|
||||
* find registration for a given block of memory
|
||||
*/
|
||||
int mca_mpool_gpusm_find(struct mca_mpool_base_module_t* mpool, void* addr,
|
||||
size_t size, mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* finalize mpool
|
||||
*/
|
||||
void mca_mpool_gpusm_finalize(struct mca_mpool_base_module_t *mpool);
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Stae
|
||||
* @return OMPI_SUCCESS or failure status
|
||||
*/
|
||||
int mca_mpool_gpusm_ft_event(int state);
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
103
ompi/mca/mpool/gpusm/mpool_gpusm_component.c
Обычный файл
103
ompi/mca/mpool/gpusm/mpool_gpusm_component.c
Обычный файл
@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
|
||||
#include "ompi_config.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "mpool_gpusm.h"
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static int gpusm_open(void);
|
||||
static int gpusm_close(void);
|
||||
static int gpusm_register(void);
|
||||
static mca_mpool_base_module_t* gpusm_init(struct mca_mpool_base_resources_t* resources);
|
||||
|
||||
mca_mpool_gpusm_component_t mca_mpool_gpusm_component = {
|
||||
{
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
{
|
||||
MCA_MPOOL_BASE_VERSION_2_0_0,
|
||||
|
||||
"gpusm", /* MCA component name */
|
||||
OMPI_MAJOR_VERSION, /* MCA component major version */
|
||||
OMPI_MINOR_VERSION, /* MCA component minor version */
|
||||
OMPI_RELEASE_VERSION, /* MCA component release version */
|
||||
gpusm_open, /* component open */
|
||||
gpusm_close,
|
||||
NULL,
|
||||
gpusm_register
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
|
||||
gpusm_init
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Component open/close/init/register functions. Most do not do anything,
|
||||
* but keep around for placeholders.
|
||||
*/
|
||||
static int gpusm_open(void)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int gpusm_register(void)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int gpusm_close(void)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static mca_mpool_base_module_t* gpusm_init(struct mca_mpool_base_resources_t *resources)
|
||||
{
|
||||
mca_mpool_gpusm_module_t* mpool_module;
|
||||
|
||||
mpool_module =
|
||||
(mca_mpool_gpusm_module_t*)malloc(sizeof(mca_mpool_gpusm_module_t));
|
||||
|
||||
mpool_module->resources = *resources;
|
||||
|
||||
mca_mpool_gpusm_module_init(mpool_module);
|
||||
|
||||
return &mpool_module->super;
|
||||
}
|
197
ompi/mca/mpool/gpusm/mpool_gpusm_module.c
Обычный файл
197
ompi/mca/mpool/gpusm/mpool_gpusm_module.c
Обычный файл
@ -0,0 +1,197 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file:
|
||||
*
|
||||
* This file implements a simple memory pool that is used by the GPU
|
||||
* buffer on the sending side. It just gets a memory handle and event
|
||||
* handle that can be sent to the remote side which can then use the
|
||||
* handles to get access to the memory and the event to determine when
|
||||
* it can start accessing the memory. There is no caching of the
|
||||
* memory handles as getting new ones is fast. The event handles are
|
||||
* cached by the cuda_common code.
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/gpusm/mpool_gpusm.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
#include "ompi/mca/common/cuda/common_cuda.h"
|
||||
|
||||
/**
|
||||
* Called when the registration free list is created. An event is created
|
||||
* for each entry.
|
||||
*/
|
||||
static void mca_mpool_gpusm_registration_constructor( mca_mpool_gpusm_registration_t *item )
|
||||
{
|
||||
mca_common_cuda_construct_event_and_handle((uint64_t **)&item->event,
|
||||
(void **)&item->evtHandle);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when the program is exiting. This destroys the events.
|
||||
*/
|
||||
static void mca_mpool_gpusm_registration_destructor( mca_mpool_gpusm_registration_t *item )
|
||||
{
|
||||
mca_common_cuda_destruct_event((uint64_t *)item->event);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_mpool_gpusm_registration_t, mca_mpool_base_registration_t,
|
||||
mca_mpool_gpusm_registration_constructor,
|
||||
mca_mpool_gpusm_registration_destructor);
|
||||
|
||||
/*
|
||||
* Initializes the mpool module.
|
||||
*/
|
||||
void mca_mpool_gpusm_module_init(mca_mpool_gpusm_module_t* mpool)
|
||||
{
|
||||
mpool->super.mpool_component = &mca_mpool_gpusm_component.super;
|
||||
mpool->super.mpool_base = NULL;
|
||||
mpool->super.mpool_alloc = NULL;
|
||||
mpool->super.mpool_realloc = NULL;
|
||||
mpool->super.mpool_free = NULL;
|
||||
mpool->super.mpool_register = mca_mpool_gpusm_register;
|
||||
mpool->super.mpool_find = mca_mpool_gpusm_find;
|
||||
mpool->super.mpool_deregister = mca_mpool_gpusm_deregister;
|
||||
mpool->super.mpool_release_memory = NULL;
|
||||
mpool->super.mpool_finalize = mca_mpool_gpusm_finalize;
|
||||
mpool->super.mpool_ft_event = mca_mpool_gpusm_ft_event;
|
||||
mpool->super.rcache = NULL;
|
||||
mpool->super.flags = 0;
|
||||
|
||||
mpool->resources.reg_data = NULL;
|
||||
mpool->resources.sizeof_reg = sizeof(struct mca_mpool_common_cuda_reg_t);
|
||||
mpool->resources.register_mem = cuda_getmemhandle;
|
||||
mpool->resources.deregister_mem = cuda_ungetmemhandle;
|
||||
|
||||
OBJ_CONSTRUCT(&mpool->reg_list, ompi_free_list_t);
|
||||
|
||||
/* Start with 0 entries in the free list since CUDA may not have
|
||||
* been initialized when this free list is created and there is
|
||||
* some CUDA specific activities that need to be done. */
|
||||
ompi_free_list_init_new(&mpool->reg_list, mpool->resources.sizeof_reg,
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_mpool_gpusm_registration_t),
|
||||
0,opal_cache_line_size,
|
||||
0, -1, 64, NULL);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Just go ahead and get a new registration. The find and register
|
||||
* functions are the same thing for this memory pool.
|
||||
*/
|
||||
int mca_mpool_gpusm_find(mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size,
|
||||
mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
return mca_mpool_gpusm_register(mpool, addr, size, 0, reg);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the one function that does all the work. It will call into
|
||||
* the register function to get the memory handle for the sending
|
||||
* buffer. There is no need to deregister the memory handle so the
|
||||
* deregister function is a no-op.
|
||||
*/
|
||||
int mca_mpool_gpusm_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, uint32_t flags,
|
||||
mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_gpusm_module_t *mpool_gpusm = (mca_mpool_gpusm_module_t*)mpool;
|
||||
mca_mpool_base_registration_t *gpusm_reg;
|
||||
ompi_free_list_item_t *item;
|
||||
unsigned char *base, *bound;
|
||||
int rc;
|
||||
|
||||
/* In spite of the fact we return an error code, the existing code
|
||||
* checks the registration for a NULL value rather than looking at
|
||||
* the return code. So, initialize the registration to NULL in
|
||||
* case we run into a failure. */
|
||||
*reg = NULL;
|
||||
|
||||
base = addr;
|
||||
bound = (unsigned char *)addr + size - 1;
|
||||
|
||||
OMPI_FREE_LIST_GET(&mpool_gpusm->reg_list, item, rc);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
gpusm_reg = (mca_mpool_base_registration_t*)item;
|
||||
|
||||
gpusm_reg->mpool = mpool;
|
||||
gpusm_reg->base = base;
|
||||
gpusm_reg->bound = bound;
|
||||
gpusm_reg->flags = flags;
|
||||
|
||||
rc = mpool_gpusm->resources.register_mem(base, size, gpusm_reg, NULL);
|
||||
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
OMPI_FREE_LIST_RETURN(&mpool_gpusm->reg_list, item);
|
||||
return rc;
|
||||
}
|
||||
|
||||
*reg = gpusm_reg;
|
||||
(*reg)->ref_count++;
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the registration to the free list.
|
||||
*/
|
||||
int mca_mpool_gpusm_deregister(struct mca_mpool_base_module_t *mpool,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
int rc;
|
||||
mca_mpool_gpusm_module_t *mpool_gpusm = (mca_mpool_gpusm_module_t *)mpool;
|
||||
|
||||
rc = mpool_gpusm->resources.deregister_mem(mpool, reg);
|
||||
OMPI_FREE_LIST_RETURN(&mpool_gpusm->reg_list, (ompi_free_list_item_t*)reg);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free up the resources.
|
||||
*/
|
||||
void mca_mpool_gpusm_finalize(struct mca_mpool_base_module_t *mpool)
|
||||
{
|
||||
ompi_free_list_item_t *item;
|
||||
mca_mpool_gpusm_module_t *mpool_gpusm = (mca_mpool_gpusm_module_t *)mpool;
|
||||
|
||||
/* Need to run the destructor on each item in the free list explicitly.
|
||||
* The destruction of the free list only runs the destructor on the
|
||||
* main free list, not each item. */
|
||||
while (NULL != (item = (ompi_free_list_item_t *)opal_atomic_lifo_pop(&(mpool_gpusm->reg_list.super)))) {
|
||||
OBJ_DESTRUCT(item);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&mpool_gpusm->reg_list);
|
||||
return;
|
||||
}
|
||||
|
||||
int mca_mpool_gpusm_ft_event(int state) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
57
ompi/mca/mpool/rgpusm/Makefile.am
Обычный файл
57
ompi/mca/mpool/rgpusm/Makefile.am
Обычный файл
@ -0,0 +1,57 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = $(mpool_rgpusm_CPPFLAGS)
|
||||
|
||||
sources = \
|
||||
mpool_rgpusm_module.c \
|
||||
mpool_rgpusm_component.c
|
||||
|
||||
if WANT_INSTALL_HEADERS
|
||||
ompidir = $(includedir)/openmpi/$(subdir)
|
||||
ompi_HEADERS = mpool_rgpusm.h
|
||||
endif
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_ompi_mpool_rgpusm_DSO
|
||||
component_noinst =
|
||||
component_install = mca_mpool_rgpusm.la
|
||||
else
|
||||
component_noinst = libmca_mpool_rgpusm.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_mpool_rgpusm_la_SOURCES = $(sources)
|
||||
mca_mpool_rgpusm_la_LDFLAGS = -module -avoid-version
|
||||
mca_mpool_rgpusm_la_LIBADD = $(mpool_rgpusm_LIBS)
|
||||
if MCA_ompi_cuda_support
|
||||
mca_mpool_rgpusm_la_LIBADD += \
|
||||
$(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la
|
||||
endif
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_mpool_rgpusm_la_SOURCES = $(sources)
|
||||
libmca_mpool_rgpusm_la_LDFLAGS = -module -avoid-version
|
||||
libmca_mpool_rgpusm_la_LIBADD = $(mpool_rgpusm_LIBS)
|
25
ompi/mca/mpool/rgpusm/configure.m4
Обычный файл
25
ompi/mca/mpool/rgpusm/configure.m4
Обычный файл
@ -0,0 +1,25 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
#
|
||||
# If CUDA support was requested, then build the CUDA memory pools.
|
||||
# This code checks the variable CUDA_SUPPORT which was set earlier in
|
||||
# the configure sequence by the opal_configure_options.m4 code.
|
||||
#
|
||||
|
||||
AC_DEFUN([MCA_ompi_mpool_rgpusm_CONFIG],[
|
||||
AC_CONFIG_FILES([ompi/mca/mpool/rgpusm/Makefile])
|
||||
|
||||
# Use CUDA_SUPPORT which was filled in by the opal configure code.
|
||||
AS_IF([test "x$CUDA_SUPPORT_41" = "x1"],
|
||||
[$1],
|
||||
[$2])
|
||||
|
||||
])dnl
|
117
ompi/mca/mpool/rgpusm/mpool_rgpusm.h
Обычный файл
117
ompi/mca/mpool/rgpusm/mpool_rgpusm.h
Обычный файл
@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef MCA_MPOOL_RGPUSM_H
|
||||
#define MCA_MPOOL_RGPUSM_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct mca_mpool_rgpusm_component_t {
|
||||
mca_mpool_base_component_t super;
|
||||
char* rcache_name;
|
||||
size_t rcache_size_limit;
|
||||
bool print_stats;
|
||||
uint32_t leave_pinned;
|
||||
int output;
|
||||
};
|
||||
typedef struct mca_mpool_rgpusm_component_t mca_mpool_rgpusm_component_t;
|
||||
|
||||
OMPI_DECLSPEC extern mca_mpool_rgpusm_component_t mca_mpool_rgpusm_component;
|
||||
|
||||
struct mca_mpool_base_resources_t {
|
||||
void *reg_data;
|
||||
size_t sizeof_reg;
|
||||
int (*register_mem)(void *base, size_t size, mca_mpool_base_registration_t *newreg,
|
||||
mca_mpool_base_registration_t *hdrreg);
|
||||
int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg);
|
||||
};
|
||||
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
|
||||
|
||||
struct mca_mpool_rgpusm_module_t {
|
||||
mca_mpool_base_module_t super;
|
||||
struct mca_mpool_base_resources_t resources;
|
||||
ompi_free_list_t reg_list;
|
||||
opal_list_t lru_list;
|
||||
uint32_t stat_cache_hit;
|
||||
uint32_t stat_cache_valid;
|
||||
uint32_t stat_cache_invalid;
|
||||
uint32_t stat_cache_miss;
|
||||
uint32_t stat_evicted;
|
||||
uint32_t stat_cache_found;
|
||||
uint32_t stat_cache_notfound;
|
||||
}; typedef struct mca_mpool_rgpusm_module_t mca_mpool_rgpusm_module_t;
|
||||
|
||||
/*
|
||||
* Initializes the mpool module.
|
||||
*/
|
||||
void mca_mpool_rgpusm_module_init(mca_mpool_rgpusm_module_t *mpool);
|
||||
|
||||
/**
|
||||
* register block of memory
|
||||
*/
|
||||
int mca_mpool_rgpusm_register(mca_mpool_base_module_t* mpool, void *addr,
|
||||
size_t size, uint32_t flags, mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* deregister memory
|
||||
*/
|
||||
int mca_mpool_rgpusm_deregister(mca_mpool_base_module_t *mpool,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
|
||||
/**
|
||||
* free memory allocated by alloc function
|
||||
*/
|
||||
void mca_mpool_rgpusm_free(mca_mpool_base_module_t *mpool, void * addr,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
|
||||
/**
|
||||
* find registration for a given block of memory
|
||||
*/
|
||||
int mca_mpool_rgpusm_find(struct mca_mpool_base_module_t* mpool, void* addr,
|
||||
size_t size, mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* unregister all registration covering the block of memory
|
||||
*/
|
||||
int mca_mpool_rgpusm_release_memory(mca_mpool_base_module_t* mpool, void *base,
|
||||
size_t size);
|
||||
|
||||
/**
|
||||
* finalize mpool
|
||||
*/
|
||||
void mca_mpool_rgpusm_finalize(struct mca_mpool_base_module_t *mpool);
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Stae
|
||||
* @return OMPI_SUCCESS or failure status
|
||||
*/
|
||||
int mca_mpool_rgpusm_ft_event(int state);
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
142
ompi/mca/mpool/rgpusm/mpool_rgpusm_component.c
Обычный файл
142
ompi/mca/mpool/rgpusm/mpool_rgpusm_component.c
Обычный файл
@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
|
||||
#include "ompi_config.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "mpool_rgpusm.h"
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static int rgpusm_open(void);
|
||||
static int rgpusm_close(void);
|
||||
static int rgpusm_register(void);
|
||||
static mca_mpool_base_module_t* rgpusm_init(struct mca_mpool_base_resources_t* resources);
|
||||
|
||||
mca_mpool_rgpusm_component_t mca_mpool_rgpusm_component = {
|
||||
{
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
{
|
||||
MCA_MPOOL_BASE_VERSION_2_0_0,
|
||||
|
||||
"rgpusm", /* MCA component name */
|
||||
OMPI_MAJOR_VERSION, /* MCA component major version */
|
||||
OMPI_MINOR_VERSION, /* MCA component minor version */
|
||||
OMPI_RELEASE_VERSION, /* MCA component release version */
|
||||
rgpusm_open, /* component open */
|
||||
rgpusm_close,
|
||||
NULL,
|
||||
rgpusm_register
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
|
||||
rgpusm_init
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* component open/close/init function
|
||||
*/
|
||||
static int rgpusm_open(void)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int rgpusm_register(void)
|
||||
{
|
||||
int val;
|
||||
|
||||
mca_base_param_reg_string(&mca_mpool_rgpusm_component.super.mpool_version,
|
||||
"rcache_name",
|
||||
"The name of the registration cache the mpool should use",
|
||||
false, false, "vma", &mca_mpool_rgpusm_component.rcache_name);
|
||||
|
||||
mca_base_param_reg_int(&mca_mpool_rgpusm_component.super.mpool_version,
|
||||
"rcache_size_limit",
|
||||
"the maximum size of registration cache in bytes. "
|
||||
"0 is unlimited (default 0)", false, false, 0, &val);
|
||||
|
||||
mca_mpool_rgpusm_component.rcache_size_limit = (size_t)val;
|
||||
|
||||
mca_base_param_reg_int(&mca_mpool_rgpusm_component.super.mpool_version,
|
||||
"leave_pinned",
|
||||
"Whether to keep memory handles around or release them when done. ",
|
||||
false, false, 1, &val);
|
||||
mca_mpool_rgpusm_component.leave_pinned = (size_t)val;
|
||||
|
||||
mca_base_param_reg_int(&mca_mpool_rgpusm_component.super.mpool_version,
|
||||
"print_stats",
|
||||
"print pool usage statistics at the end of the run",
|
||||
false, false, 0, &val);
|
||||
|
||||
mca_mpool_rgpusm_component.print_stats = val?true:false;
|
||||
|
||||
/* Set different levels of verbosity in the rgpusm related code. */
|
||||
mca_base_param_reg_int(&mca_mpool_rgpusm_component.super.mpool_version,
|
||||
"verbose",
|
||||
"Set level of mpool rgpusm verbosity",
|
||||
false, false, 0, &val);
|
||||
mca_mpool_rgpusm_component.output = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(mca_mpool_rgpusm_component.output, val);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int rgpusm_close(void)
|
||||
{
|
||||
if (NULL != mca_mpool_rgpusm_component.rcache_name) {
|
||||
free(mca_mpool_rgpusm_component.rcache_name);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static mca_mpool_base_module_t* rgpusm_init(
|
||||
struct mca_mpool_base_resources_t *resources)
|
||||
{
|
||||
mca_mpool_rgpusm_module_t* mpool_module;
|
||||
|
||||
mpool_module =
|
||||
(mca_mpool_rgpusm_module_t*)malloc(sizeof(mca_mpool_rgpusm_module_t));
|
||||
|
||||
mpool_module->resources = *resources;
|
||||
|
||||
mca_mpool_rgpusm_module_init(mpool_module);
|
||||
|
||||
return &mpool_module->super;
|
||||
}
|
590
ompi/mca/mpool/rgpusm/mpool_rgpusm_module.c
Обычный файл
590
ompi/mca/mpool/rgpusm/mpool_rgpusm_module.c
Обычный файл
@ -0,0 +1,590 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file:
|
||||
*
|
||||
* This memory pool is used for getting the memory handle of remote
|
||||
* GPU memory when using CUDA. Hence, the name is "rgpusm" for "remote
|
||||
* CUDA" GPU memory. There is a cache that can be used to store the
|
||||
* remote handles in case they are reused to save on the registration
|
||||
* cost as that can be expensive, on the order of 100 usecs. The
|
||||
* cache can also be used just to track how many handles are in use at
|
||||
* a time. It is best to look at this with the three different
|
||||
* scenarios that are possible.
|
||||
* 1. mpool_rgpusm_leave_pinned=0, cache_size=unlimited
|
||||
* 2. mpool_rgpusm_leave_pinned=0, cache_size=limited
|
||||
* 3. mpool_rgpusm_leave_pinned=1, cache_size=unlimited (default)
|
||||
* 4. mpool_rgpusm_leave_pinned=1, cache_size=limited.
|
||||
*
|
||||
* Case 1: The cache is unused and remote memory is registered and
|
||||
* unregistered for each transaction. The amount of outstanding
|
||||
* registered memory is unlimited.
|
||||
* Case 2: The cache keeps track of how much memory is registered at a
|
||||
* time. Since leave pinned is 0, any memory that is registered is in
|
||||
* use. If the amount to register exceeds the amount, we will error
|
||||
* out. This could be handled more gracefully, but this is not a
|
||||
* common way to run, so we will leave as is.
|
||||
* Case 3: The cache is needed to track current and past transactions.
|
||||
* However, there is no limit on the number that can be stored.
|
||||
* Therefore, once memory enters the cache, and gets registered, it
|
||||
* stays that way forever.
|
||||
* Case 4: The cache is needed to track current and past transactions.
|
||||
* In addition, a list of most recently used (but no longer in use)
|
||||
* registrations is stored so that it can be used to evict
|
||||
* registrations from the cache. In addition, these registrations are
|
||||
* deregistered.
|
||||
*
|
||||
* I also want to capture how we can run into the case where we do not
|
||||
* find something in the cache, but when we try to register it, we get
|
||||
* an error back from the CUDA library saying the memory is in use.
|
||||
* This can happen in the following scenario. The application mallocs
|
||||
* a buffer of size 32K. The library loads this in the cache and
|
||||
* registers it. The application then frees the buffer. It then
|
||||
* mallocs a buffer of size 64K. This malloc returns the same base
|
||||
* address as the first 32K allocation. The library searches the
|
||||
* cache, but since the size is larger than the original allocation it
|
||||
* does not find the registration. It then attempts to register this.
|
||||
* The CUDA library returns an error saying it is already mapped. To
|
||||
* handle this, we return an error of OMPI_ERR_WOULD_BLOCK to the
|
||||
* memory pool. The memory pool then looks for the registration based
|
||||
* on the base address and a size of 4. We use the small size to make
|
||||
* sure that we find the registration. This registration is evicted,
|
||||
* and we try to register again.
|
||||
*/
|
||||
|
||||
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
|
||||
#include "ompi_config.h"
|
||||
#include "opal/align.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "ompi/mca/mpool/rgpusm/mpool_rgpusm.h"
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#ifdef HAVE_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
#include "ompi/mca/rcache/rcache.h"
|
||||
#include "ompi/mca/rcache/base/base.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
#include "ompi/mca/common/cuda/common_cuda.h"
|
||||
|
||||
|
||||
/* A hack so that page alignment is disabled in my instantiation of
|
||||
* the rcache. This needs to be fixed. */
|
||||
static size_t saved_page_size;
|
||||
#define SET_PAGE_ALIGNMENT_TO_ZERO() \
|
||||
saved_page_size = mca_mpool_base_page_size_log; \
|
||||
mca_mpool_base_page_size_log = 0;
|
||||
|
||||
#define RESTORE_PAGE_ALIGNMENT() \
|
||||
mca_mpool_base_page_size_log = saved_page_size;
|
||||
|
||||
static inline bool mca_mpool_rgpusm_deregister_lru (mca_mpool_base_module_t *mpool) {
|
||||
mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *) mpool;
|
||||
mca_mpool_base_registration_t *old_reg;
|
||||
int rc;
|
||||
|
||||
/* Remove the registration from the cache and list before
|
||||
deregistering the memory */
|
||||
old_reg = (mca_mpool_base_registration_t*)
|
||||
opal_list_remove_first (&mpool_rgpusm->lru_list);
|
||||
if (NULL == old_reg) {
|
||||
return false;
|
||||
}
|
||||
|
||||
mpool->rcache->rcache_delete(mpool->rcache, old_reg);
|
||||
|
||||
/* Drop the rcache lock while we deregister the memory */
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
assert(old_reg->ref_count == 0);
|
||||
rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data,
|
||||
old_reg);
|
||||
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
||||
|
||||
/* This introduces a potential leak of registrations if
|
||||
the deregistration fails to occur as we no longer have
|
||||
a reference to it. Is this possible? */
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
return false;
|
||||
}
|
||||
|
||||
OMPI_FREE_LIST_RETURN(&mpool_rgpusm->reg_list,
|
||||
(ompi_free_list_item_t*)old_reg);
|
||||
mpool_rgpusm->stat_evicted++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Initializes the mpool module.
|
||||
*/
|
||||
void mca_mpool_rgpusm_module_init(mca_mpool_rgpusm_module_t* mpool)
|
||||
{
|
||||
mpool->super.mpool_component = &mca_mpool_rgpusm_component.super;
|
||||
mpool->super.mpool_base = NULL; /* no base .. */
|
||||
mpool->super.mpool_alloc = NULL;
|
||||
mpool->super.mpool_realloc = NULL;
|
||||
mpool->super.mpool_free = mca_mpool_rgpusm_free;
|
||||
mpool->super.mpool_register = mca_mpool_rgpusm_register;
|
||||
mpool->super.mpool_find = mca_mpool_rgpusm_find;
|
||||
mpool->super.mpool_deregister = mca_mpool_rgpusm_deregister;
|
||||
mpool->super.mpool_release_memory = NULL;
|
||||
mpool->super.mpool_finalize = mca_mpool_rgpusm_finalize;
|
||||
mpool->super.mpool_ft_event = mca_mpool_rgpusm_ft_event;
|
||||
mpool->super.rcache =
|
||||
mca_rcache_base_module_create(mca_mpool_rgpusm_component.rcache_name);
|
||||
mpool->super.flags = 0;
|
||||
|
||||
mpool->resources.reg_data = NULL;
|
||||
mpool->resources.sizeof_reg = sizeof(struct mca_mpool_common_cuda_reg_t);
|
||||
mpool->resources.register_mem = cuda_openmemhandle;
|
||||
mpool->resources.deregister_mem = cuda_closememhandle;
|
||||
|
||||
OBJ_CONSTRUCT(&mpool->reg_list, ompi_free_list_t);
|
||||
ompi_free_list_init_new(&mpool->reg_list, mpool->resources.sizeof_reg,
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_mpool_base_registration_t),
|
||||
0,opal_cache_line_size,
|
||||
0, -1, 32, NULL);
|
||||
OBJ_CONSTRUCT(&mpool->lru_list, opal_list_t);
|
||||
mpool->stat_cache_hit = mpool->stat_cache_miss = mpool->stat_evicted = 0;
|
||||
mpool->stat_cache_found = mpool->stat_cache_notfound = 0;
|
||||
mpool->stat_cache_valid = mpool->stat_cache_invalid = 0;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* This function opens and handle using the handle that was received
|
||||
* from the remote memory. It uses the addr and size of the remote
|
||||
* memory for caching the registration.
|
||||
*/
|
||||
int mca_mpool_rgpusm_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, uint32_t flags,
|
||||
mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool;
|
||||
mca_mpool_common_cuda_reg_t *rgpusm_reg;
|
||||
mca_mpool_common_cuda_reg_t *rget_reg;
|
||||
ompi_free_list_item_t *item;
|
||||
int rc;
|
||||
int mypeer; /* just for debugging */
|
||||
|
||||
/* In order to preserve the signature of the mca_mpool_rgpusm_register
|
||||
* function, we are using the **reg variable to not only get back the
|
||||
* registration information, but to hand in the memory handle received
|
||||
* from the remote side. */
|
||||
rget_reg = (mca_mpool_common_cuda_reg_t *)*reg;
|
||||
|
||||
mypeer = flags;
|
||||
flags = 0;
|
||||
/* No need to support MCA_MPOOL_FLAGS_CACHE_BYPASS in here. It is not used. */
|
||||
assert(0 == (flags & MCA_MPOOL_FLAGS_CACHE_BYPASS));
|
||||
|
||||
/* This chunk of code handles the case where leave pinned is not
|
||||
* set and we do not use the cache. This is not typically how we
|
||||
* will be running. This means that one can have an unlimited
|
||||
* number of registrations occuring at the same time. Since we
|
||||
* are not leaving the registrations pinned, the number of
|
||||
* registrations is unlimited and there is no need for a cache. */
|
||||
if(!mca_mpool_rgpusm_component.leave_pinned && 0 == mca_mpool_rgpusm_component.rcache_size_limit) {
|
||||
OMPI_FREE_LIST_GET(&mpool_rgpusm->reg_list, item, rc);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
rgpusm_reg = (mca_mpool_common_cuda_reg_t*)item;
|
||||
rgpusm_reg->base.mpool = mpool;
|
||||
rgpusm_reg->base.base = addr;
|
||||
rgpusm_reg->base.bound = (unsigned char *)addr + size - 1;;
|
||||
rgpusm_reg->base.flags = flags;
|
||||
|
||||
/* Copy the memory handle received into the registration */
|
||||
memcpy(rgpusm_reg->memHandle, rget_reg->memHandle, sizeof(rget_reg->memHandle));
|
||||
|
||||
/* The rget_reg registration is holding the memory handle needed
|
||||
* to register the remote memory. This was received from the remote
|
||||
* process. A pointer to the memory is returned in the alloc_base field. */
|
||||
rc = mpool_rgpusm->resources.register_mem(addr, size,
|
||||
(mca_mpool_base_registration_t *)rgpusm_reg,
|
||||
(mca_mpool_base_registration_t *)rget_reg);
|
||||
|
||||
/* This error should not happen with no cache in use. */
|
||||
assert(OMPI_ERR_WOULD_BLOCK != rc);
|
||||
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
OMPI_FREE_LIST_RETURN(&mpool_rgpusm->reg_list, item);
|
||||
return rc;
|
||||
}
|
||||
rgpusm_reg->base.ref_count++;
|
||||
*reg = (mca_mpool_base_registration_t *)rgpusm_reg;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Check to see if memory is registered and stored in the cache. */
|
||||
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
||||
SET_PAGE_ALIGNMENT_TO_ZERO();
|
||||
mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
|
||||
RESTORE_PAGE_ALIGNMENT();
|
||||
|
||||
/* If *reg is not NULL, we have a registration. Let us see if the
|
||||
* memory handle matches the one we were looking for. If not, the
|
||||
* registration is invalid and needs to be removed. This happens
|
||||
* if memory was allocated, freed, and allocated again and ends up
|
||||
* with the same virtual address and within the limits of the
|
||||
* previous registration. The memory handle check will catch that
|
||||
* scenario as the handles have unique serial numbers. */
|
||||
if (*reg != NULL) {
|
||||
mpool_rgpusm->stat_cache_hit++;
|
||||
opal_output_verbose(10, mca_mpool_rgpusm_component.output,
|
||||
"Found addr=%p, size=%d (base=%p,size=%d)in cache",
|
||||
addr, (int)size, (*reg)->base,
|
||||
(int)((*reg)->bound - (*reg)->base));
|
||||
|
||||
if (mca_common_cuda_memhandle_matches((mca_mpool_common_cuda_reg_t *)*reg, rget_reg)) {
|
||||
/* Registration matches what was requested. All is good. */
|
||||
mpool_rgpusm->stat_cache_valid++;
|
||||
} else {
|
||||
/* This is an old registration. Need to boot it. */
|
||||
opal_output_verbose(10, mca_mpool_rgpusm_component.output,
|
||||
"Mismatched Handle: Evicting addr=%p, size=%d in cache",
|
||||
addr, (int)size);
|
||||
/* The ref_count has to be zero as this memory cannot possibly
|
||||
* be in use. Assert on that just to make sure. */
|
||||
assert(0 == (*reg)->ref_count);
|
||||
if (mca_mpool_rgpusm_component.leave_pinned) {
|
||||
opal_list_remove_item(&mpool_rgpusm->lru_list,
|
||||
(opal_list_item_t*)(*reg));
|
||||
}
|
||||
|
||||
/* Bump the reference count to keep things copacetic in deregister */
|
||||
(*reg)->ref_count++;
|
||||
/* Invalidate the registration so it will get booted out. */
|
||||
(*reg)->flags |= MCA_MPOOL_FLAGS_INVALID;
|
||||
mca_mpool_rgpusm_deregister(mpool, *reg);
|
||||
*reg = NULL;
|
||||
mpool_rgpusm->stat_cache_invalid++;
|
||||
}
|
||||
} else {
|
||||
/* Nothing was found in the cache. */
|
||||
mpool_rgpusm->stat_cache_miss++;
|
||||
}
|
||||
|
||||
/* If we have a registration here, then we know it is valid. */
|
||||
if (*reg != NULL) {
|
||||
opal_output_verbose(10, mca_mpool_rgpusm_component.output,
|
||||
"CACHE HIT is good: ep=%d, addr=%p, size=%d in cache",
|
||||
mypeer, addr, (int)size);
|
||||
|
||||
/* When using leave pinned, we keep an LRU list. */
|
||||
if ((0 == (*reg)->ref_count) && mca_mpool_rgpusm_component.leave_pinned) {
|
||||
opal_output_verbose(20, mca_mpool_rgpusm_component.output,
|
||||
"POP OFF LRU: ep=%d, addr=%p, size=%d in cache",
|
||||
mypeer, addr, (int)size);
|
||||
opal_list_remove_item(&mpool_rgpusm->lru_list,
|
||||
(opal_list_item_t*)(*reg));
|
||||
}
|
||||
(*reg)->ref_count++;
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
opal_output(-1, "reg->ref_count=%d", (int)(*reg)->ref_count);
|
||||
opal_output_verbose(80, mca_mpool_rgpusm_component.output,
|
||||
"Found entry in cache addr=%p, size=%d", addr, (int)size);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* If we are here, then we did not find a registration, or it was invalid,
|
||||
* so this is a new one, and we are going to use the cache. */
|
||||
assert(NULL == *reg);
|
||||
opal_output_verbose(10, mca_mpool_rgpusm_component.output,
|
||||
"New registration ep=%d, addr=%p, size=%d in cache",
|
||||
mypeer, addr, (int)size);
|
||||
|
||||
OMPI_FREE_LIST_GET(&mpool_rgpusm->reg_list, item, rc);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
return rc;
|
||||
}
|
||||
rgpusm_reg = (mca_mpool_common_cuda_reg_t*)item;
|
||||
|
||||
rgpusm_reg->base.mpool = mpool;
|
||||
rgpusm_reg->base.base = addr;
|
||||
rgpusm_reg->base.bound = (unsigned char *)addr + size - 1;
|
||||
rgpusm_reg->base.flags = flags;
|
||||
|
||||
/* Need the memory handle saved in the registration */
|
||||
memcpy(rgpusm_reg->memHandle, rget_reg->memHandle, sizeof(rget_reg->memHandle));
|
||||
|
||||
/* Actually register the memory, which opens the memory handle.
|
||||
* Need to do this prior to putting in the cache as the base and
|
||||
* bound values may be changed by the registration. The memory
|
||||
* associated with the handle comes back in the alloc_base
|
||||
* value. */
|
||||
rc = mpool_rgpusm->resources.register_mem(addr, size, (mca_mpool_base_registration_t *)rgpusm_reg,
|
||||
(mca_mpool_base_registration_t *)rget_reg);
|
||||
/* There is a chance we can get the OMPI_ERR_WOULD_BLOCK from the
|
||||
* CUDA codes attempt to register the memory. The case that this
|
||||
* can happen is as follows. A block of memory is registered.
|
||||
* Then the sending side frees the memory. The sending side then
|
||||
* cuMemAllocs memory again and gets the same base
|
||||
* address. However, it cuMemAllocs a block that is larger than
|
||||
* the one in the cache. The cache will return that memory is not
|
||||
* registered and call into CUDA to register it. However, that
|
||||
* will fail with CUDA_ERROR_ALREADY_MAPPED. Therefore we need to
|
||||
* boot that previous allocation out and deregister it first.
|
||||
*/
|
||||
if (OMPI_ERR_WOULD_BLOCK == rc) {
|
||||
mca_mpool_base_registration_t *oldreg;
|
||||
|
||||
SET_PAGE_ALIGNMENT_TO_ZERO();
|
||||
/* Need to make sure it is at least 4 bytes in size This will
|
||||
* ensure we get the hit in the cache. */
|
||||
mpool->rcache->rcache_find(mpool->rcache, addr, 4, &oldreg);
|
||||
RESTORE_PAGE_ALIGNMENT();
|
||||
/* The ref_count has to be zero as this memory cannot possibly
|
||||
* be in use. Assert on that just to make sure. */
|
||||
assert(0 == oldreg->ref_count);
|
||||
if (mca_mpool_rgpusm_component.leave_pinned) {
|
||||
opal_list_remove_item(&mpool_rgpusm->lru_list,
|
||||
(opal_list_item_t*)oldreg);
|
||||
}
|
||||
|
||||
/* Bump the reference count to keep things copacetic in deregister */
|
||||
oldreg->ref_count++;
|
||||
/* Invalidate the registration so it will get booted out. */
|
||||
oldreg->flags |= MCA_MPOOL_FLAGS_INVALID;
|
||||
mca_mpool_rgpusm_deregister(mpool, oldreg);
|
||||
mpool_rgpusm->stat_evicted++;
|
||||
|
||||
/* And try again. This only needs to be attempted one other time. */
|
||||
rc = mpool_rgpusm->resources.register_mem(addr, size, (mca_mpool_base_registration_t *)rgpusm_reg,
|
||||
(mca_mpool_base_registration_t *)rget_reg);
|
||||
}
|
||||
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
OMPI_FREE_LIST_RETURN(&mpool_rgpusm->reg_list, item);
|
||||
return rc;
|
||||
}
|
||||
|
||||
opal_output_verbose(80, mca_mpool_rgpusm_component.output,
|
||||
"About to insert in rgpusm cache addr=%p, size=%d", addr, (int)size);
|
||||
SET_PAGE_ALIGNMENT_TO_ZERO();
|
||||
while((rc = mpool->rcache->rcache_insert(mpool->rcache, (mca_mpool_base_registration_t *)rgpusm_reg,
|
||||
mca_mpool_rgpusm_component.rcache_size_limit)) ==
|
||||
OMPI_ERR_TEMP_OUT_OF_RESOURCE) {
|
||||
opal_output(-1, "No room in the cache - boot one out");
|
||||
if (!mca_mpool_rgpusm_deregister_lru(mpool)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
RESTORE_PAGE_ALIGNMENT();
|
||||
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
OMPI_FREE_LIST_RETURN(&mpool_rgpusm->reg_list, item);
|
||||
/* We cannot recover from this. We can be here if the size of the cache
|
||||
* is smaller than the amount of memory we are trying to register in a single
|
||||
* transfer. In that case, rc is MPI_ERR_OUT_OF_RESOURCES, but everything is
|
||||
* stuck at that point. Therefore, just error out completely.
|
||||
*/
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
rgpusm_reg->base.ref_count++;
|
||||
*reg = (mca_mpool_base_registration_t *)rgpusm_reg;
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
|
||||
/* Cleanup any vmas that we have deferred deletion on */
|
||||
mpool->rcache->rcache_clean(mpool->rcache);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* free function
|
||||
*/
|
||||
void mca_mpool_rgpusm_free(mca_mpool_base_module_t *mpool, void *addr,
|
||||
mca_mpool_base_registration_t *registration)
|
||||
{
|
||||
void *alloc_base = registration->alloc_base;
|
||||
mca_mpool_rgpusm_deregister(mpool, registration);
|
||||
free(alloc_base);
|
||||
}
|
||||
|
||||
int mca_mpool_rgpusm_find(struct mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool;
|
||||
int rc;
|
||||
unsigned char *base, *bound;
|
||||
|
||||
base = addr;
|
||||
bound = base + size - 1; /* To keep cache hits working correctly */
|
||||
|
||||
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
||||
opal_output(-1, "Looking for addr=%p, size=%d", addr, (int)size);
|
||||
SET_PAGE_ALIGNMENT_TO_ZERO();
|
||||
rc = mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
|
||||
RESTORE_PAGE_ALIGNMENT();
|
||||
if(*reg != NULL && mca_mpool_rgpusm_component.leave_pinned) {
|
||||
if(0 == (*reg)->ref_count && mca_mpool_rgpusm_component.leave_pinned) {
|
||||
opal_list_remove_item(&mpool_rgpusm->lru_list, (opal_list_item_t*)(*reg));
|
||||
}
|
||||
mpool_rgpusm->stat_cache_found++;
|
||||
(*reg)->ref_count++;
|
||||
} else {
|
||||
mpool_rgpusm->stat_cache_notfound++;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline bool registration_is_cachebale(mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
return !(reg->flags &
|
||||
(MCA_MPOOL_FLAGS_CACHE_BYPASS |
|
||||
MCA_MPOOL_FLAGS_INVALID));
|
||||
}
|
||||
|
||||
int mca_mpool_rgpusm_deregister(struct mca_mpool_base_module_t *mpool,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool;
|
||||
int rc = OMPI_SUCCESS;
|
||||
assert(reg->ref_count > 0);
|
||||
|
||||
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
||||
reg->ref_count--;
|
||||
opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count);
|
||||
if(reg->ref_count > 0) {
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
if(mca_mpool_rgpusm_component.leave_pinned && registration_is_cachebale(reg))
|
||||
{
|
||||
/* if leave_pinned is set don't deregister memory, but put it
|
||||
* on LRU list for future use */
|
||||
opal_list_prepend(&mpool_rgpusm->lru_list, (opal_list_item_t*)reg);
|
||||
} else {
|
||||
/* Remove from rcache first */
|
||||
if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
|
||||
mpool->rcache->rcache_delete(mpool->rcache, reg);
|
||||
|
||||
/* Drop the rcache lock before deregistring the memory */
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
|
||||
{
|
||||
mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t *)mpool;
|
||||
|
||||
assert(reg->ref_count == 0);
|
||||
rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data,
|
||||
reg);
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
||||
|
||||
if(OMPI_SUCCESS == rc) {
|
||||
OMPI_FREE_LIST_RETURN(&mpool_rgpusm->reg_list,
|
||||
(ompi_free_list_item_t*)reg);
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
|
||||
/* Cleanup any vmas that we have deferred deletion on */
|
||||
mpool->rcache->rcache_clean(mpool->rcache);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
#define RGPUSM_MPOOL_NREGS 100
|
||||
|
||||
void mca_mpool_rgpusm_finalize(struct mca_mpool_base_module_t *mpool)
|
||||
{
|
||||
mca_mpool_rgpusm_module_t *mpool_rgpusm = (mca_mpool_rgpusm_module_t*)mpool;
|
||||
mca_mpool_base_registration_t *reg;
|
||||
mca_mpool_base_registration_t *regs[RGPUSM_MPOOL_NREGS];
|
||||
int reg_cnt, i;
|
||||
int rc;
|
||||
|
||||
/* Statistic */
|
||||
if(true == mca_mpool_rgpusm_component.print_stats) {
|
||||
opal_output(0, "%s rgpusm: stats "
|
||||
"(hit/valid/invalid/miss/evicted): %d/%d/%d/%d/%d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
mpool_rgpusm->stat_cache_hit, mpool_rgpusm->stat_cache_valid,
|
||||
mpool_rgpusm->stat_cache_invalid, mpool_rgpusm->stat_cache_miss,
|
||||
mpool_rgpusm->stat_evicted);
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
||||
do {
|
||||
reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, 0, (size_t)-1,
|
||||
regs, RGPUSM_MPOOL_NREGS);
|
||||
opal_output(-1, "Registration size at finalize = %d", reg_cnt);
|
||||
|
||||
for(i = 0; i < reg_cnt; i++) {
|
||||
reg = regs[i];
|
||||
|
||||
if(reg->ref_count) {
|
||||
reg->ref_count = 0; /* otherway dereg will fail on assert */
|
||||
} else if (mca_mpool_rgpusm_component.leave_pinned) {
|
||||
opal_list_remove_item(&mpool_rgpusm->lru_list,
|
||||
(opal_list_item_t*)reg);
|
||||
}
|
||||
|
||||
/* Remove from rcache first */
|
||||
mpool->rcache->rcache_delete(mpool->rcache, reg);
|
||||
|
||||
/* Drop lock before deregistering memory */
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
assert(reg->ref_count == 0);
|
||||
rc = mpool_rgpusm->resources.deregister_mem(mpool_rgpusm->resources.reg_data,
|
||||
reg);
|
||||
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
||||
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
/* Potentially lose track of registrations
|
||||
do we have to put it back? */
|
||||
continue;
|
||||
}
|
||||
|
||||
OMPI_FREE_LIST_RETURN(&mpool_rgpusm->reg_list,
|
||||
(ompi_free_list_item_t*)reg);
|
||||
}
|
||||
} while(reg_cnt == RGPUSM_MPOOL_NREGS);
|
||||
|
||||
OBJ_DESTRUCT(&mpool_rgpusm->lru_list);
|
||||
OBJ_DESTRUCT(&mpool_rgpusm->reg_list);
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
|
||||
/* Cleanup any vmas that we have deferred deletion on */
|
||||
mpool->rcache->rcache_clean(mpool->rcache);
|
||||
|
||||
}
|
||||
|
||||
int mca_mpool_rgpusm_ft_event(int state) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
Загрузка…
Ссылка в новой задаче
Block a user