1
1

Add a lock and clean up handling of some error conditions,

This commit was SVN r32258.
Этот коммит содержится в:
Rolf vandeVaart 2014-07-17 19:33:10 +00:00
родитель 0c2aa8abcd
Коммит 947a4e14b4

Просмотреть файл

@ -97,6 +97,7 @@ struct cudaFunctionTable {
int (*cuCtxSetCurrent)(CUcontext); int (*cuCtxSetCurrent)(CUcontext);
int (*cuEventSynchronize)(CUevent); int (*cuEventSynchronize)(CUevent);
int (*cuStreamSynchronize)(CUstream); int (*cuStreamSynchronize)(CUstream);
int (*cuStreamDestroy)(CUstream);
} cudaFunctionTable; } cudaFunctionTable;
typedef struct cudaFunctionTable cudaFunctionTable_t; typedef struct cudaFunctionTable cudaFunctionTable_t;
cudaFunctionTable_t cuFunc; cudaFunctionTable_t cuFunc;
@ -110,10 +111,11 @@ bool mca_common_cuda_enabled = false;
static bool mca_common_cuda_register_memory = true; static bool mca_common_cuda_register_memory = true;
static bool mca_common_cuda_warning = false; static bool mca_common_cuda_warning = false;
static opal_list_t common_cuda_memory_registrations; static opal_list_t common_cuda_memory_registrations;
static CUstream ipcStream; static CUstream ipcStream = NULL;
static CUstream dtohStream; static CUstream dtohStream = NULL;
static CUstream htodStream; static CUstream htodStream = NULL;
static CUstream memcpyStream; static CUstream memcpyStream = NULL;
static opal_mutex_t common_cuda_init_lock;
/* Functions called by opal layer - plugged into opal function table */ /* Functions called by opal layer - plugged into opal function table */
static int mca_common_cuda_is_gpu_buffer(const void*); static int mca_common_cuda_is_gpu_buffer(const void*);
@ -229,6 +231,7 @@ int mca_common_cuda_stage_one_init(void)
return 0; return 0;
} }
stage_one_init_complete = true; stage_one_init_complete = true;
OBJ_CONSTRUCT(&common_cuda_init_lock, opal_mutex_t);
/* Set different levels of verbosity in the cuda related code. */ /* Set different levels of verbosity in the cuda related code. */
mca_common_cuda_verbose = 0; mca_common_cuda_verbose = 0;
@ -472,6 +475,7 @@ int mca_common_cuda_stage_one_init(void)
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxSetCurrent); OMPI_CUDA_DLSYM(libcuda_handle, cuCtxSetCurrent);
OMPI_CUDA_DLSYM(libcuda_handle, cuEventSynchronize); OMPI_CUDA_DLSYM(libcuda_handle, cuEventSynchronize);
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamSynchronize); OMPI_CUDA_DLSYM(libcuda_handle, cuStreamSynchronize);
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamDestroy);
return 0; return 0;
} }
@ -506,26 +510,37 @@ static int mca_common_cuda_stage_two_init(opal_common_cuda_function_table_t *fta
*/ */
static int mca_common_cuda_stage_three_init(void) static int mca_common_cuda_stage_three_init(void)
{ {
int i, s; int i, s, rc;
CUresult res; CUresult res;
CUcontext cuContext; CUcontext cuContext;
common_cuda_mem_regs_t *mem_reg; common_cuda_mem_regs_t *mem_reg;
stage_three_init_complete = true; OPAL_THREAD_LOCK(&common_cuda_init_lock);
opal_output_verbose(20, mca_common_cuda_output, opal_output_verbose(20, mca_common_cuda_output,
"CUDA: entering stage three init"); "CUDA: entering stage three init");
/* Compiled without support or user disabled support */
if (OPAL_UNLIKELY(!ompi_mpi_cuda_support)) { if (OPAL_UNLIKELY(!ompi_mpi_cuda_support)) {
opal_output_verbose(20, mca_common_cuda_output, opal_output_verbose(20, mca_common_cuda_output,
"CUDA: No mpi cuda support, exiting stage three init"); "CUDA: No mpi cuda support, exiting stage three init");
stage_three_init_complete = true;
OPAL_THREAD_UNLOCK(&common_cuda_init_lock);
return OMPI_ERROR; return OMPI_ERROR;
} }
if (OPAL_LIKELY(common_cuda_initialized)) { /* In case another thread snuck in and completed the initialization */
opal_output_verbose(20, mca_common_cuda_output, if (true == stage_three_init_complete) {
"CUDA: Stage three already complete, exiting stage three init"); if (common_cuda_initialized) {
return OMPI_SUCCESS; opal_output_verbose(20, mca_common_cuda_output,
"CUDA: Stage three already complete, exiting stage three init");
OPAL_THREAD_UNLOCK(&common_cuda_init_lock);
return OMPI_SUCCESS;
} else {
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: Stage three already complete, failed during the init");
OPAL_THREAD_UNLOCK(&common_cuda_init_lock);
return OMPI_ERROR;
}
} }
/* Check to see if this process is running in a CUDA context. If /* Check to see if this process is running in a CUDA context. If
@ -566,6 +581,7 @@ static int mca_common_cuda_stage_three_init(void)
* CUDA support. * CUDA support.
*/ */
if (false == mca_common_cuda_enabled) { if (false == mca_common_cuda_enabled) {
OPAL_THREAD_UNLOCK(&common_cuda_init_lock);
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -578,11 +594,12 @@ static int mca_common_cuda_stage_three_init(void)
cuda_event_ipc_first_avail = 0; cuda_event_ipc_first_avail = 0;
cuda_event_ipc_first_used = 0; cuda_event_ipc_first_used = 0;
cuda_event_ipc_array = (CUevent *) malloc(sizeof(CUevent) * cuda_event_max); cuda_event_ipc_array = (CUevent *) calloc(cuda_event_max, sizeof(CUevent *));
if (NULL == cuda_event_ipc_array) { if (NULL == cuda_event_ipc_array) {
opal_show_help("help-mpi-common-cuda.txt", "No memory", opal_show_help("help-mpi-common-cuda.txt", "No memory",
true, ompi_process_info.nodename); true, ompi_process_info.nodename);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
/* Create the events since they can be reused. */ /* Create the events since they can be reused. */
@ -591,7 +608,8 @@ static int mca_common_cuda_stage_three_init(void)
if (CUDA_SUCCESS != res) { if (CUDA_SUCCESS != res) {
opal_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed", opal_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
true, ompi_process_info.nodename, res); true, ompi_process_info.nodename, res);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
} }
@ -602,7 +620,8 @@ static int mca_common_cuda_stage_three_init(void)
if (NULL == cuda_event_ipc_frag_array) { if (NULL == cuda_event_ipc_frag_array) {
opal_show_help("help-mpi-common-cuda.txt", "No memory", opal_show_help("help-mpi-common-cuda.txt", "No memory",
true, ompi_process_info.nodename); true, ompi_process_info.nodename);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
} }
@ -616,11 +635,12 @@ static int mca_common_cuda_stage_three_init(void)
cuda_event_dtoh_first_avail = 0; cuda_event_dtoh_first_avail = 0;
cuda_event_dtoh_first_used = 0; cuda_event_dtoh_first_used = 0;
cuda_event_dtoh_array = (CUevent *) malloc(sizeof(CUevent) * cuda_event_max); cuda_event_dtoh_array = (CUevent *) calloc(cuda_event_max, sizeof(CUevent *));
if (NULL == cuda_event_dtoh_array) { if (NULL == cuda_event_dtoh_array) {
opal_show_help("help-mpi-common-cuda.txt", "No memory", opal_show_help("help-mpi-common-cuda.txt", "No memory",
true, ompi_process_info.nodename); true, ompi_process_info.nodename);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
/* Create the events since they can be reused. */ /* Create the events since they can be reused. */
@ -629,7 +649,8 @@ static int mca_common_cuda_stage_three_init(void)
if (CUDA_SUCCESS != res) { if (CUDA_SUCCESS != res) {
opal_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed", opal_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
true, ompi_process_info.nodename, res); true, ompi_process_info.nodename, res);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
} }
@ -640,7 +661,8 @@ static int mca_common_cuda_stage_three_init(void)
if (NULL == cuda_event_dtoh_frag_array) { if (NULL == cuda_event_dtoh_frag_array) {
opal_show_help("help-mpi-common-cuda.txt", "No memory", opal_show_help("help-mpi-common-cuda.txt", "No memory",
true, ompi_process_info.nodename); true, ompi_process_info.nodename);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
/* Set up an array to store outstanding async htod events. Used on the /* Set up an array to store outstanding async htod events. Used on the
@ -651,11 +673,12 @@ static int mca_common_cuda_stage_three_init(void)
cuda_event_htod_first_avail = 0; cuda_event_htod_first_avail = 0;
cuda_event_htod_first_used = 0; cuda_event_htod_first_used = 0;
cuda_event_htod_array = (CUevent *) malloc(sizeof(CUevent) * cuda_event_max); cuda_event_htod_array = (CUevent *) calloc(cuda_event_max, sizeof(CUevent *));
if (NULL == cuda_event_htod_array) { if (NULL == cuda_event_htod_array) {
opal_show_help("help-mpi-common-cuda.txt", "No memory", opal_show_help("help-mpi-common-cuda.txt", "No memory",
true, ompi_process_info.nodename); true, ompi_process_info.nodename);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
/* Create the events since they can be reused. */ /* Create the events since they can be reused. */
@ -664,7 +687,8 @@ static int mca_common_cuda_stage_three_init(void)
if (CUDA_SUCCESS != res) { if (CUDA_SUCCESS != res) {
opal_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed", opal_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
true, ompi_process_info.nodename, res); true, ompi_process_info.nodename, res);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
} }
@ -675,7 +699,8 @@ static int mca_common_cuda_stage_three_init(void)
if (NULL == cuda_event_htod_frag_array) { if (NULL == cuda_event_htod_frag_array) {
opal_show_help("help-mpi-common-cuda.txt", "No memory", opal_show_help("help-mpi-common-cuda.txt", "No memory",
true, ompi_process_info.nodename); true, ompi_process_info.nodename);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
} }
@ -707,7 +732,8 @@ static int mca_common_cuda_stage_three_init(void)
if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) { if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
opal_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed", opal_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
true, ompi_process_info.nodename, res); true, ompi_process_info.nodename, res);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
/* Create stream for use in dtoh asynchronous copies */ /* Create stream for use in dtoh asynchronous copies */
@ -715,8 +741,8 @@ static int mca_common_cuda_stage_three_init(void)
if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) { if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
opal_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed", opal_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
true, ompi_process_info.nodename, res); true, ompi_process_info.nodename, res);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
/* Create stream for use in htod asynchronous copies */ /* Create stream for use in htod asynchronous copies */
@ -724,8 +750,8 @@ static int mca_common_cuda_stage_three_init(void)
if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) { if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
opal_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed", opal_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
true, ompi_process_info.nodename, res); true, ompi_process_info.nodename, res);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
if (mca_common_cuda_cumemcpy_async) { if (mca_common_cuda_cumemcpy_async) {
@ -734,14 +760,64 @@ static int mca_common_cuda_stage_three_init(void)
if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) { if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
opal_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed", opal_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
true, ompi_process_info.nodename, res); true, ompi_process_info.nodename, res);
return OMPI_ERROR; rc = OMPI_ERROR;
goto cleanup_and_error;
} }
} }
opal_output_verbose(30, mca_common_cuda_output, opal_output_verbose(30, mca_common_cuda_output,
"CUDA: initialized"); "CUDA: initialized");
common_cuda_initialized = true; common_cuda_initialized = true;
stage_three_init_complete = true;
OPAL_THREAD_UNLOCK(&common_cuda_init_lock);
return OMPI_SUCCESS; return OMPI_SUCCESS;
/* If we are here, something went wrong. Cleanup and return an error. */
cleanup_and_error:
for (i = 0; i < cuda_event_max; i++) {
if (NULL != cuda_event_ipc_array[i]) {
cuFunc.cuEventDestroy(cuda_event_ipc_array[i]);
}
if (NULL != cuda_event_htod_array[i]) {
cuFunc.cuEventDestroy(cuda_event_htod_array[i]);
}
if (NULL != cuda_event_dtoh_array[i]) {
cuFunc.cuEventDestroy(cuda_event_dtoh_array[i]);
}
}
if (NULL != cuda_event_ipc_array) {
free(cuda_event_ipc_array);
}
if (NULL != cuda_event_htod_array) {
free(cuda_event_htod_array);
}
if (NULL != cuda_event_dtoh_array) {
free(cuda_event_dtoh_array);
}
if (NULL != cuda_event_ipc_frag_array) {
free(cuda_event_ipc_frag_array);
}
if (NULL != cuda_event_htod_frag_array) {
free(cuda_event_ipc_frag_array);
}
if (NULL != cuda_event_dtoh_frag_array) {
free(cuda_event_dtoh_frag_array);
}
if (NULL != ipcStream) {
cuFunc.cuStreamDestroy(ipcStream);
}
if (NULL != dtohStream) {
cuFunc.cuStreamDestroy(dtohStream);
}
if (NULL != htodStream) {
cuFunc.cuStreamDestroy(htodStream);
}
if (NULL != memcpyStream) {
cuFunc.cuStreamDestroy(memcpyStream);
}
stage_three_init_complete = true;
OPAL_THREAD_UNLOCK(&common_cuda_init_lock);
return rc;
} }