1
1

Add extra check for GDR. Adjust some names and replace opal_output with opal_show_help.

This commit was SVN r29853.
Этот коммит содержится в:
Rolf vandeVaart 2013-12-10 16:04:08 +00:00
родитель 0f61bb651e
Коммит 1cc55f305f
3 изменённых файлов: 40 добавлений и 3 удалений

Просмотреть файл

@ -315,6 +315,7 @@ struct mca_btl_openib_component_t {
bool cuda_async_send; bool cuda_async_send;
bool cuda_async_recv; bool cuda_async_recv;
bool cuda_have_gdr; bool cuda_have_gdr;
bool driver_have_gdr;
bool cuda_want_gdr; bool cuda_want_gdr;
#endif /* OPAL_CUDA_SUPPORT */ #endif /* OPAL_CUDA_SUPPORT */
#if HAVE_DECL_IBV_LINK_LAYER_ETHERNET #if HAVE_DECL_IBV_LINK_LAYER_ETHERNET

Просмотреть файл

@ -29,6 +29,7 @@
#include "opal/util/bit_ops.h" #include "opal/util/bit_ops.h"
#include "opal/mca/installdirs/installdirs.h" #include "opal/mca/installdirs/installdirs.h"
#include "opal/util/os_dirpath.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "opal/util/show_help.h" #include "opal/util/show_help.h"
#include "btl_openib.h" #include "btl_openib.h"
@ -583,7 +584,7 @@ int btl_openib_register_mca_params(void)
/* Indicates if library was built with GPU Direct RDMA support. Not changeable. */ /* Indicates if library was built with GPU Direct RDMA support. Not changeable. */
mca_btl_openib_component.cuda_have_gdr = OPAL_INT_TO_BOOL(OPAL_CUDA_GDR_SUPPORT); mca_btl_openib_component.cuda_have_gdr = OPAL_INT_TO_BOOL(OPAL_CUDA_GDR_SUPPORT);
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version, "have_cuda_gdr_support", (void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version, "have_cuda_gdr",
"Whether CUDA GPU Direct RDMA support is built into library or not", "Whether CUDA GPU Direct RDMA support is built into library or not",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY, MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
@ -591,14 +592,36 @@ int btl_openib_register_mca_params(void)
MCA_BASE_VAR_SCOPE_CONSTANT, MCA_BASE_VAR_SCOPE_CONSTANT,
&mca_btl_openib_component.cuda_have_gdr); &mca_btl_openib_component.cuda_have_gdr);
/* Indicates if driver has GPU Direct RDMA support. Not changeable. */
if (OPAL_SUCCESS == opal_os_dirpath_access("/sys/kernel/mm/memory_peers/nv_mem/version", S_IRUSR)) {
mca_btl_openib_component.driver_have_gdr = 1;
} else {
mca_btl_openib_component.driver_have_gdr = 0;
}
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version, "have_driver_gdr",
"Whether Infiniband driver has GPU Direct RDMA support",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_4,
MCA_BASE_VAR_SCOPE_CONSTANT,
&mca_btl_openib_component.driver_have_gdr);
/* Default for GPU Direct RDMA is off for now */ /* Default for GPU Direct RDMA is off for now */
CHECK(reg_bool("cuda_want_gdr_support", NULL, CHECK(reg_bool("want_cuda_gdr", NULL,
"Enable or disable CUDA GPU Direct RDMA support " "Enable or disable CUDA GPU Direct RDMA support "
"(true = yes; false = no)", "(true = yes; false = no)",
false, &mca_btl_openib_component.cuda_want_gdr)); false, &mca_btl_openib_component.cuda_want_gdr));
if (mca_btl_openib_component.cuda_want_gdr && !mca_btl_openib_component.cuda_have_gdr) { if (mca_btl_openib_component.cuda_want_gdr && !mca_btl_openib_component.cuda_have_gdr) {
opal_output(0, "GDR support requested but library does not have it built in."); opal_show_help("help-mpi-btl-openib.txt",
"CUDA_no_gdr_support", true,
ompi_process_info.nodename);
return OMPI_ERROR;
}
if (mca_btl_openib_component.cuda_want_gdr && !mca_btl_openib_component.driver_have_gdr) {
opal_show_help("help-mpi-btl-openib.txt",
"driver_no_gdr_support", true,
ompi_process_info.nodename);
return OMPI_ERROR; return OMPI_ERROR;
} }
#if OPAL_CUDA_GDR_SUPPORT #if OPAL_CUDA_GDR_SUPPORT

Просмотреть файл

@ -13,6 +13,7 @@
# Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved. # Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved.
# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
# Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
# Additional copyrights may follow # Additional copyrights may follow
@ -710,3 +711,15 @@ parameters:
Total memory: %lu MiB Total memory: %lu MiB
%s %s
[CUDA_no_gdr_support]
You requested to run with CUDA GPU Direct RDMA support but the Open MPI
library was not built with that support. The Open MPI library must be
configured with CUDA 6.0 or later.
Local host: %s
[driver_no_gdr_support]
You requested to run with CUDA GPU Direct RDMA support but this OFED
installation does not have that support. Contact Mellanox to figure
out how to get an OFED stack with that support.
Local host: %s