1
1

Merge pull request #4346 from matcabral/psm2_mtl_mq_thread_fix

MTL PSM2: add a thread lock while peeking and completing the psm2 requests.
Этот коммит содержится в:
Ralph Castain 2017-10-24 16:41:29 -05:00 коммит произвёл GitHub
родитель 14a0701949 b81bcd4b0d
Коммит cf3bc4f55b
3 изменённых файлов: 46 добавлений и 38 удалений

Просмотреть файл

@ -406,58 +406,62 @@ int ompi_mtl_psm2_progress( void ) {
int completed = 1; int completed = 1;
do { do {
OPAL_THREAD_LOCK(&mtl_psm2_mq_mutex);
err = psm2_mq_ipeek2(ompi_mtl_psm2.mq, &req, NULL); err = psm2_mq_ipeek2(ompi_mtl_psm2.mq, &req, NULL);
if (err == PSM2_MQ_INCOMPLETE) { if (err == PSM2_MQ_INCOMPLETE) {
return completed; OPAL_THREAD_UNLOCK(&mtl_psm2_mq_mutex);
} else if (err != PSM2_OK) { return completed;
goto error; } else if (OPAL_UNLIKELY(err != PSM2_OK)) {
} OPAL_THREAD_UNLOCK(&mtl_psm2_mq_mutex);
goto error;
}
completed++; err = psm2_mq_test2(&req, &psm2_status);
OPAL_THREAD_UNLOCK(&mtl_psm2_mq_mutex);
err = psm2_mq_test2(&req, &psm2_status); if (OPAL_UNLIKELY (err != PSM2_OK)) {
if (err != PSM2_OK) { goto error;
goto error; }
}
completed++;
mtl_psm2_request = (mca_mtl_psm2_request_t*) psm2_status.context; mtl_psm2_request = (mca_mtl_psm2_request_t*) psm2_status.context;
if (mtl_psm2_request->type == OMPI_mtl_psm2_IRECV) { if (mtl_psm2_request->type == OMPI_mtl_psm2_IRECV) {
mtl_psm2_request->super.ompi_req->req_status.MPI_SOURCE = mtl_psm2_request->super.ompi_req->req_status.MPI_SOURCE =
psm2_status.msg_tag.tag1; psm2_status.msg_tag.tag1;
mtl_psm2_request->super.ompi_req->req_status.MPI_TAG = mtl_psm2_request->super.ompi_req->req_status.MPI_TAG =
psm2_status.msg_tag.tag0; psm2_status.msg_tag.tag0;
mtl_psm2_request->super.ompi_req->req_status._ucount = mtl_psm2_request->super.ompi_req->req_status._ucount =
psm2_status.nbytes; psm2_status.nbytes;
ompi_mtl_datatype_unpack(mtl_psm2_request->convertor, ompi_mtl_datatype_unpack(mtl_psm2_request->convertor,
mtl_psm2_request->buf, mtl_psm2_request->buf,
psm2_status.msg_length); psm2_status.msg_length);
} }
if(mtl_psm2_request->type == OMPI_mtl_psm2_ISEND) { if(mtl_psm2_request->type == OMPI_mtl_psm2_ISEND) {
if (mtl_psm2_request->free_after) { if (mtl_psm2_request->free_after) {
free(mtl_psm2_request->buf); free(mtl_psm2_request->buf);
} }
} }
switch (psm2_status.error_code) { switch (psm2_status.error_code) {
case PSM2_OK: case PSM2_OK:
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR = mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
OMPI_SUCCESS; OMPI_SUCCESS;
break; break;
case PSM2_MQ_TRUNCATION: case PSM2_MQ_TRUNCATION:
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR = mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
MPI_ERR_TRUNCATE; MPI_ERR_TRUNCATE;
break; break;
default: default:
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR = mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
MPI_ERR_INTERN; MPI_ERR_INTERN;
} }
mtl_psm2_request->super.completion_callback(&mtl_psm2_request->super);
mtl_psm2_request->super.completion_callback(&mtl_psm2_request->super);
} }
while (1); while (1);

Просмотреть файл

@ -34,6 +34,8 @@
BEGIN_C_DECLS BEGIN_C_DECLS
/* MPI_THREAD_MULTIPLE_SUPPORT */
extern opal_mutex_t mtl_psm2_mq_mutex;
/* MTL interface functions */ /* MTL interface functions */
extern int ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t* mtl, extern int ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t* mtl,

Просмотреть файл

@ -42,6 +42,8 @@
#include <glob.h> #include <glob.h>
static int param_priority; static int param_priority;
/* MPI_THREAD_MULTIPLE_SUPPORT */
opal_mutex_t mtl_psm2_mq_mutex = OPAL_MUTEX_STATIC_INIT;
static int ompi_mtl_psm2_component_open(void); static int ompi_mtl_psm2_component_open(void);
static int ompi_mtl_psm2_component_close(void); static int ompi_mtl_psm2_component_close(void);