1
1

Merge pull request #6844 from adrianreber/check_for_user_ns

Do not use CMA in user namespaces
Этот коммит содержится в:
Jeff Squyres 2019-09-20 22:10:42 -04:00 коммит произвёл GitHub
родитель a7da93f88c fc68d8a90f
Коммит 8038fac8f9
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 127 добавлений и 5 удалений

Просмотреть файл

@ -84,7 +84,12 @@ union vader_modex_t {
void *segment_base;
} xpmem;
#endif
opal_shmem_ds_t seg_ds;
struct vader_modex_other_t {
ino_t user_ns_id;
int seg_ds_size;
/* seg_ds needs to be the last element */
opal_shmem_ds_t seg_ds;
} other;
};
/**
@ -270,6 +275,8 @@ int mca_btl_vader_get_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif
ino_t mca_btl_vader_get_user_ns_id(void);
int mca_btl_vader_get_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,

Просмотреть файл

@ -42,6 +42,10 @@
#include "btl_vader_fbox.h"
#include "btl_vader_xpmem.h"
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#include <sys/mman.h>
#include <fcntl.h>
@ -351,6 +355,25 @@ static int mca_btl_vader_component_close(void)
return OPAL_SUCCESS;
}
/*
* mca_btl_vader_parse_proc_ns_user() tries to get the user namespace ID
* of the current process.
* Returns the ID of the user namespace. In the case of an error '0' is returned.
*/
ino_t mca_btl_vader_get_user_ns_id(void)
{
struct stat buf;
if (0 > stat("/proc/self/ns/user", &buf)) {
/*
* Something went wrong, probably an old kernel that does not support namespaces
* simply assume all processes are in the same user namespace and return 0
*/
return 0;
}
return buf.st_ino;
}
static int mca_btl_base_vader_modex_send (void)
{
union vader_modex_t modex;
@ -364,8 +387,16 @@ static int mca_btl_base_vader_modex_send (void)
modex_size = sizeof (modex.xpmem);
} else {
#endif
modex_size = opal_shmem_sizeof_shmem_ds (&mca_btl_vader_component.seg_ds);
memmove (&modex.seg_ds, &mca_btl_vader_component.seg_ds, modex_size);
modex.other.seg_ds_size = opal_shmem_sizeof_shmem_ds (&mca_btl_vader_component.seg_ds);
memmove (&modex.other.seg_ds, &mca_btl_vader_component.seg_ds, modex.other.seg_ds_size);
modex.other.user_ns_id = mca_btl_vader_get_user_ns_id();
/*
* If modex.other.user_ns_id is '0' something did not work out
* during user namespace detection. Assuming there are no
* namespaces available it will return '0' for all processes and
* the check later will see '0' everywhere and not disable CMA.
*/
modex_size = sizeof (modex.other);
#if OPAL_BTL_VADER_HAVE_XPMEM
}

Просмотреть файл

@ -27,6 +27,7 @@
*/
#include "opal_config.h"
#include "opal/util/show_help.h"
#include "btl_vader.h"
#include "btl_vader_endpoint.h"
@ -79,6 +80,28 @@ mca_btl_vader_t mca_btl_vader = {
}
};
/*
* Exit function copied from btl_usnic_util.c
*
* The following comment tells Coverity that this function does not return.
* See https://scan.coverity.com/tune.
*/
/* coverity[+kill] */
static void vader_btl_exit(mca_btl_vader_t *btl)
{
if (NULL != btl && NULL != btl->error_cb) {
btl->error_cb(&btl->super, MCA_BTL_ERROR_FLAGS_FATAL,
(opal_proc_t*) opal_proc_local_get(),
"The vader BTL is aborting the MPI job (via PML error callback).");
}
/* If the PML error callback returns (or if there wasn't one), just exit. Shrug. */
fprintf(stderr, "*** The Open MPI vader BTL is aborting the MPI job (via exit(3)).\n");
fflush(stderr);
exit(1);
}
static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
{
mca_btl_vader_component_t *component = &mca_btl_vader_component;
@ -173,6 +196,7 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_proc_t *proc, int remote_rank) {
mca_btl_vader_component_t *component = &mca_btl_vader_component;
union vader_modex_t *modex;
ino_t my_user_ns_id;
size_t msg_size;
int rc;
@ -197,17 +221,58 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_
} else {
#endif
/* store a copy of the segment information for detach */
ep->segment_data.other.seg_ds = malloc (msg_size);
ep->segment_data.other.seg_ds = malloc (modex->other.seg_ds_size);
if (NULL == ep->segment_data.other.seg_ds) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
memcpy (ep->segment_data.other.seg_ds, &modex->seg_ds, msg_size);
memcpy (ep->segment_data.other.seg_ds, &modex->other.seg_ds, modex->other.seg_ds_size);
ep->segment_base = opal_shmem_segment_attach (ep->segment_data.other.seg_ds);
if (NULL == ep->segment_base) {
return OPAL_ERROR;
}
if (MCA_BTL_VADER_CMA == mca_btl_vader_component.single_copy_mechanism) {
my_user_ns_id = mca_btl_vader_get_user_ns_id();
if (my_user_ns_id != modex->other.user_ns_id) {
mca_base_var_source_t source;
int vari;
rc = mca_base_var_find_by_name("btl_vader_single_copy_mechanism", &vari);
if (OPAL_ERROR == rc) {
return OPAL_ERROR;
}
rc = mca_base_var_get_value(vari, NULL, &source, NULL);
if (OPAL_ERROR == rc) {
return OPAL_ERROR;
}
/*
* CMA is not possible as different user namespaces are in use.
* Currently the kernel does not allow * process_vm_{read,write}v()
* for processes running in different user namespaces even if
* all involved user IDs are mapped to the same user ID.
*
* Fallback to MCA_BTL_VADER_EMUL.
*/
if (MCA_BASE_VAR_SOURCE_DEFAULT != source) {
/* If CMA has been explicitly selected we want to error out */
opal_show_help("help-btl-vader.txt", "cma-different-user-namespace-error",
true, opal_process_info.nodename);
vader_btl_exit(&mca_btl_vader);
}
/*
* If CMA has been selected because it is the default or
* some fallback, this falls back even further.
*/
opal_show_help("help-btl-vader.txt", "cma-different-user-namespace-warning",
true, opal_process_info.nodename);
mca_btl_vader_component.single_copy_mechanism = MCA_BTL_VADER_EMUL;
mca_btl_vader.super.btl_get = mca_btl_vader_get_sc_emu;
mca_btl_vader.super.btl_put = mca_btl_vader_put_sc_emu;
mca_btl_vader.super.btl_put_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
mca_btl_vader.super.btl_get_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
}
}
#if OPAL_BTL_VADER_HAVE_XPMEM
}
#endif

Просмотреть файл

@ -121,6 +121,25 @@ WARNING: Linux kernel CMA support was requested via the
btl_vader_single_copy_mechanism MCA variable, but CMA support is
not available due to restrictive ptrace settings.
The vader shared memory BTL will fall back on another single-copy
mechanism if one is available. This may result in lower performance.
Local host: %s
#
[cma-different-user-namespace-error]
ERROR: Linux kernel CMA support was requested via the
btl_vader_single_copy_mechanism MCA variable, but CMA support is
not available due to different user namespaces.
Your MPI job will abort now. Please select another value for
btl_vader_single_copy_mechanism.
Local host: %s
#
[cma-different-user-namespace-warning]
WARNING: The default btl_vader_single_copy_mechanism CMA is
not available due to different user namespaces.
The vader shared memory BTL will fall back on another single-copy
mechanism if one is available. This may result in lower performance.