1
1

Add a lock and clean up handling of some error conditions,

This commit was SVN r32258.
Этот коммит содержится в:
Rolf vandeVaart 2014-07-17 19:33:10 +00:00
родитель 0c2aa8abcd
Коммит 947a4e14b4

Просмотреть файл

@ -97,6 +97,7 @@ struct cudaFunctionTable {
int (*cuCtxSetCurrent)(CUcontext);
int (*cuEventSynchronize)(CUevent);
int (*cuStreamSynchronize)(CUstream);
int (*cuStreamDestroy)(CUstream);
} cudaFunctionTable;
typedef struct cudaFunctionTable cudaFunctionTable_t;
cudaFunctionTable_t cuFunc;
@ -110,10 +111,11 @@ bool mca_common_cuda_enabled = false;
static bool mca_common_cuda_register_memory = true;
static bool mca_common_cuda_warning = false;
static opal_list_t common_cuda_memory_registrations;
static CUstream ipcStream;
static CUstream dtohStream;
static CUstream htodStream;
static CUstream memcpyStream;
static CUstream ipcStream = NULL;
static CUstream dtohStream = NULL;
static CUstream htodStream = NULL;
static CUstream memcpyStream = NULL;
static opal_mutex_t common_cuda_init_lock;
/* Functions called by opal layer - plugged into opal function table */
static int mca_common_cuda_is_gpu_buffer(const void*);
@ -229,6 +231,7 @@ int mca_common_cuda_stage_one_init(void)
return 0;
}
stage_one_init_complete = true;
OBJ_CONSTRUCT(&common_cuda_init_lock, opal_mutex_t);
/* Set different levels of verbosity in the cuda related code. */
mca_common_cuda_verbose = 0;
@ -472,6 +475,7 @@ int mca_common_cuda_stage_one_init(void)
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxSetCurrent);
OMPI_CUDA_DLSYM(libcuda_handle, cuEventSynchronize);
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamSynchronize);
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamDestroy);
return 0;
}
@ -506,26 +510,37 @@ static int mca_common_cuda_stage_two_init(opal_common_cuda_function_table_t *fta
*/
static int mca_common_cuda_stage_three_init(void)
{
int i, s;
int i, s, rc;
CUresult res;
CUcontext cuContext;
common_cuda_mem_regs_t *mem_reg;
stage_three_init_complete = true;
OPAL_THREAD_LOCK(&common_cuda_init_lock);
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: entering stage three init");
/* Compiled without support or user disabled support */
if (OPAL_UNLIKELY(!ompi_mpi_cuda_support)) {
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: No mpi cuda support, exiting stage three init");
stage_three_init_complete = true;
OPAL_THREAD_UNLOCK(&common_cuda_init_lock);
return OMPI_ERROR;
}
if (OPAL_LIKELY(common_cuda_initialized)) {
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: Stage three already complete, exiting stage three init");
return OMPI_SUCCESS;
/* In case another thread snuck in and completed the initialization */
if (true == stage_three_init_complete) {
if (common_cuda_initialized) {
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: Stage three already complete, exiting stage three init");
OPAL_THREAD_UNLOCK(&common_cuda_init_lock);
return OMPI_SUCCESS;
} else {
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: Stage three already complete, failed during the init");
OPAL_THREAD_UNLOCK(&common_cuda_init_lock);
return OMPI_ERROR;
}
}
/* Check to see if this process is running in a CUDA context. If
@ -566,6 +581,7 @@ static int mca_common_cuda_stage_three_init(void)
* CUDA support.
*/
if (false == mca_common_cuda_enabled) {
OPAL_THREAD_UNLOCK(&common_cuda_init_lock);
return OMPI_ERROR;
}
@ -578,11 +594,12 @@ static int mca_common_cuda_stage_three_init(void)
cuda_event_ipc_first_avail = 0;
cuda_event_ipc_first_used = 0;
cuda_event_ipc_array = (CUevent *) malloc(sizeof(CUevent) * cuda_event_max);
cuda_event_ipc_array = (CUevent *) calloc(cuda_event_max, sizeof(CUevent *));
if (NULL == cuda_event_ipc_array) {
opal_show_help("help-mpi-common-cuda.txt", "No memory",
true, ompi_process_info.nodename);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
/* Create the events since they can be reused. */
@ -591,7 +608,8 @@ static int mca_common_cuda_stage_three_init(void)
if (CUDA_SUCCESS != res) {
opal_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
true, ompi_process_info.nodename, res);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
}
@ -602,7 +620,8 @@ static int mca_common_cuda_stage_three_init(void)
if (NULL == cuda_event_ipc_frag_array) {
opal_show_help("help-mpi-common-cuda.txt", "No memory",
true, ompi_process_info.nodename);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
}
@ -616,11 +635,12 @@ static int mca_common_cuda_stage_three_init(void)
cuda_event_dtoh_first_avail = 0;
cuda_event_dtoh_first_used = 0;
cuda_event_dtoh_array = (CUevent *) malloc(sizeof(CUevent) * cuda_event_max);
cuda_event_dtoh_array = (CUevent *) calloc(cuda_event_max, sizeof(CUevent *));
if (NULL == cuda_event_dtoh_array) {
opal_show_help("help-mpi-common-cuda.txt", "No memory",
true, ompi_process_info.nodename);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
/* Create the events since they can be reused. */
@ -629,7 +649,8 @@ static int mca_common_cuda_stage_three_init(void)
if (CUDA_SUCCESS != res) {
opal_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
true, ompi_process_info.nodename, res);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
}
@ -640,7 +661,8 @@ static int mca_common_cuda_stage_three_init(void)
if (NULL == cuda_event_dtoh_frag_array) {
opal_show_help("help-mpi-common-cuda.txt", "No memory",
true, ompi_process_info.nodename);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
/* Set up an array to store outstanding async htod events. Used on the
@ -651,11 +673,12 @@ static int mca_common_cuda_stage_three_init(void)
cuda_event_htod_first_avail = 0;
cuda_event_htod_first_used = 0;
cuda_event_htod_array = (CUevent *) malloc(sizeof(CUevent) * cuda_event_max);
cuda_event_htod_array = (CUevent *) calloc(cuda_event_max, sizeof(CUevent *));
if (NULL == cuda_event_htod_array) {
opal_show_help("help-mpi-common-cuda.txt", "No memory",
true, ompi_process_info.nodename);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
/* Create the events since they can be reused. */
@ -664,7 +687,8 @@ static int mca_common_cuda_stage_three_init(void)
if (CUDA_SUCCESS != res) {
opal_show_help("help-mpi-common-cuda.txt", "cuEventCreate failed",
true, ompi_process_info.nodename, res);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
}
@ -675,7 +699,8 @@ static int mca_common_cuda_stage_three_init(void)
if (NULL == cuda_event_htod_frag_array) {
opal_show_help("help-mpi-common-cuda.txt", "No memory",
true, ompi_process_info.nodename);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
}
@ -707,7 +732,8 @@ static int mca_common_cuda_stage_three_init(void)
if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
opal_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
true, ompi_process_info.nodename, res);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
/* Create stream for use in dtoh asynchronous copies */
@ -715,8 +741,8 @@ static int mca_common_cuda_stage_three_init(void)
if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
opal_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
true, ompi_process_info.nodename, res);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
/* Create stream for use in htod asynchronous copies */
@ -724,8 +750,8 @@ static int mca_common_cuda_stage_three_init(void)
if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
opal_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
true, ompi_process_info.nodename, res);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
if (mca_common_cuda_cumemcpy_async) {
@ -734,14 +760,64 @@ static int mca_common_cuda_stage_three_init(void)
if (OPAL_UNLIKELY(res != CUDA_SUCCESS)) {
opal_show_help("help-mpi-common-cuda.txt", "cuStreamCreate failed",
true, ompi_process_info.nodename, res);
return OMPI_ERROR;
rc = OMPI_ERROR;
goto cleanup_and_error;
}
}
opal_output_verbose(30, mca_common_cuda_output,
"CUDA: initialized");
common_cuda_initialized = true;
stage_three_init_complete = true;
OPAL_THREAD_UNLOCK(&common_cuda_init_lock);
return OMPI_SUCCESS;
/* If we are here, something went wrong. Cleanup and return an error. */
cleanup_and_error:
for (i = 0; i < cuda_event_max; i++) {
if (NULL != cuda_event_ipc_array[i]) {
cuFunc.cuEventDestroy(cuda_event_ipc_array[i]);
}
if (NULL != cuda_event_htod_array[i]) {
cuFunc.cuEventDestroy(cuda_event_htod_array[i]);
}
if (NULL != cuda_event_dtoh_array[i]) {
cuFunc.cuEventDestroy(cuda_event_dtoh_array[i]);
}
}
if (NULL != cuda_event_ipc_array) {
free(cuda_event_ipc_array);
}
if (NULL != cuda_event_htod_array) {
free(cuda_event_htod_array);
}
if (NULL != cuda_event_dtoh_array) {
free(cuda_event_dtoh_array);
}
if (NULL != cuda_event_ipc_frag_array) {
free(cuda_event_ipc_frag_array);
}
if (NULL != cuda_event_htod_frag_array) {
free(cuda_event_ipc_frag_array);
}
if (NULL != cuda_event_dtoh_frag_array) {
free(cuda_event_dtoh_frag_array);
}
if (NULL != ipcStream) {
cuFunc.cuStreamDestroy(ipcStream);
}
if (NULL != dtohStream) {
cuFunc.cuStreamDestroy(dtohStream);
}
if (NULL != htodStream) {
cuFunc.cuStreamDestroy(htodStream);
}
if (NULL != memcpyStream) {
cuFunc.cuStreamDestroy(memcpyStream);
}
stage_three_init_complete = true;
OPAL_THREAD_UNLOCK(&common_cuda_init_lock);
return rc;
}