Improve error message to help user figure out what to do
Этот коммит содержится в:
родитель
65a279019e
Коммит
66f6026214
@ -1107,7 +1107,7 @@ int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *n
|
||||
}
|
||||
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
|
||||
opal_show_help("help-mpi-common-cuda.txt", "cuIpcOpenMemHandle failed",
|
||||
true, result, base);
|
||||
true, OPAL_PROC_MY_HOSTNAME, result, base);
|
||||
/* Currently, this is a non-recoverable error */
|
||||
return OPAL_ERROR;
|
||||
} else {
|
||||
|
@ -80,10 +80,12 @@ Rerun with --mca mpi_common_cuda_event_max %d
|
||||
[cuIpcOpenMemHandle failed]
|
||||
The call to cuIpcOpenMemHandle failed. This is an unrecoverable error
|
||||
and will cause the program to abort.
|
||||
cuIpcOpenMemHandle return value: %d
|
||||
address: %p
|
||||
Check the cuda.h file for what the return value means. Perhaps a reboot
|
||||
of the node will clear the problem.
|
||||
Hostname: %s
|
||||
cuIpcOpenMemHandle return value: %d
|
||||
address: %p
|
||||
Check the cuda.h file for what the return value means. A possible cause
|
||||
for this is not enough free device memory. Try to reduce the device
|
||||
memory footprint of your application.
|
||||
#
|
||||
[cuIpcCloseMemHandle failed]
|
||||
The call to cuIpcCloseMemHandle failed. This is a warning and the program
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user