1
1

OSHMEM: fix infinite recursion and stack size violation

send reply before posting the receive request again to limit the recursion size to
number of receive requests.
send can call opal_progress which calls this function again. If recv req is started
stack size will be proportional to number of job ranks.
Этот коммит содержится в:
Mike Dubman 2014-12-31 14:52:10 +02:00
родитель e732152304
Коммит 54a072caaa

Просмотреть файл

@ -340,6 +340,7 @@ static inline int my_MPI_Test(ompi_request_t ** rptr,
ompi_request_t *request = *rptr;
assert(request->req_persistent);
assert(request->req_state != OMPI_REQUEST_INACTIVE);
if (request->req_complete) {
int old_error;
@ -371,7 +372,7 @@ static int oshmem_mkey_recv_cb(void)
n = 0;
r = (oob_comm_request_t *)opal_list_get_first(&memheap_oob.req_list);
assert(r);
while (1) {
while(r != opal_list_get_end(&memheap_oob.req_list)) {
my_MPI_Test(&r->recv_req, &flag, &status);
if (OPAL_LIKELY(0 == flag)) {
return n;
@ -400,6 +401,15 @@ static int oshmem_mkey_recv_cb(void)
}
opal_dss.load(msg, (void*)tmp_buf, size);
/*
* send reply before posting the receive request again to limit the recursion size to
* number of receive requests.
* send can call opal_progress which calls this function again. If recv req is started
* stack size will be proportional to number of job ranks.
*/
do_recv(status.MPI_SOURCE, msg);
OBJ_RELEASE(msg);
rc = MPI_Start(&r->recv_req);
if (MPI_SUCCESS != rc) {
MEMHEAP_ERROR("Failed to post recv request %d", rc);
@ -408,8 +418,6 @@ static int oshmem_mkey_recv_cb(void)
}
opal_list_append(&memheap_oob.req_list, &r->super);
do_recv(status.MPI_SOURCE, msg);
OBJ_RELEASE(msg);
r = (oob_comm_request_t *)opal_list_get_first(&memheap_oob.req_list);
assert(r);