OSHMEM: fix infinite recursion and stack size violation
send reply before posting the receive request again to limit the recursion size to number of receive requests. send can call opal_progress which calls this function again. If recv req is started stack size will be proportional to number of job ranks.
Этот коммит содержится в:
родитель
e732152304
Коммит
54a072caaa
@ -340,6 +340,7 @@ static inline int my_MPI_Test(ompi_request_t ** rptr,
|
||||
ompi_request_t *request = *rptr;
|
||||
|
||||
assert(request->req_persistent);
|
||||
assert(request->req_state != OMPI_REQUEST_INACTIVE);
|
||||
|
||||
if (request->req_complete) {
|
||||
int old_error;
|
||||
@ -371,7 +372,7 @@ static int oshmem_mkey_recv_cb(void)
|
||||
n = 0;
|
||||
r = (oob_comm_request_t *)opal_list_get_first(&memheap_oob.req_list);
|
||||
assert(r);
|
||||
while (1) {
|
||||
while(r != opal_list_get_end(&memheap_oob.req_list)) {
|
||||
my_MPI_Test(&r->recv_req, &flag, &status);
|
||||
if (OPAL_LIKELY(0 == flag)) {
|
||||
return n;
|
||||
@ -400,6 +401,15 @@ static int oshmem_mkey_recv_cb(void)
|
||||
}
|
||||
opal_dss.load(msg, (void*)tmp_buf, size);
|
||||
|
||||
/*
|
||||
* send reply before posting the receive request again to limit the recursion size to
|
||||
* number of receive requests.
|
||||
* send can call opal_progress which calls this function again. If recv req is started
|
||||
* stack size will be proportional to number of job ranks.
|
||||
*/
|
||||
do_recv(status.MPI_SOURCE, msg);
|
||||
OBJ_RELEASE(msg);
|
||||
|
||||
rc = MPI_Start(&r->recv_req);
|
||||
if (MPI_SUCCESS != rc) {
|
||||
MEMHEAP_ERROR("Failed to post recv request %d", rc);
|
||||
@ -408,8 +418,6 @@ static int oshmem_mkey_recv_cb(void)
|
||||
}
|
||||
opal_list_append(&memheap_oob.req_list, &r->super);
|
||||
|
||||
do_recv(status.MPI_SOURCE, msg);
|
||||
OBJ_RELEASE(msg);
|
||||
|
||||
r = (oob_comm_request_t *)opal_list_get_first(&memheap_oob.req_list);
|
||||
assert(r);
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user