diff --git a/ompi/mca/pml/ob1/pml_ob1.h b/ompi/mca/pml/ob1/pml_ob1.h index bc066202eb..a564e90d26 100644 --- a/ompi/mca/pml/ob1/pml_ob1.h +++ b/ompi/mca/pml/ob1/pml_ob1.h @@ -208,6 +208,14 @@ extern int mca_pml_ob1_start( size_t count, extern int mca_pml_ob1_ft_event( int state ); +/** + * We will use these requests to hold on a traditionally allocated + * requests in order to allow the parallel debugger full access to the + * message queues (instead of allocating the requests on the stack). + */ +extern struct mca_pml_ob1_recv_request_t *mca_pml_ob1_recvreq; +extern struct mca_pml_ob1_send_request_t *mca_pml_ob1_sendreq; + END_C_DECLS struct mca_pml_ob1_pckt_pending_t { diff --git a/ompi/mca/pml/ob1/pml_ob1_component.c b/ompi/mca/pml/ob1/pml_ob1_component.c index 309c51cdf8..2f37468ee2 100644 --- a/ompi/mca/pml/ob1/pml_ob1_component.c +++ b/ompi/mca/pml/ob1/pml_ob1_component.c @@ -304,6 +304,15 @@ int mca_pml_ob1_component_fini(void) OBJ_DESTRUCT(&mca_pml_ob1.lock); OBJ_DESTRUCT(&mca_pml_ob1.send_ranges); + if( NULL != mca_pml_ob1_recvreq ) { + OBJ_DESTRUCT(mca_pml_ob1_recvreq); + mca_pml_ob1_recvreq = NULL; + } + if( NULL != mca_pml_ob1_sendreq ) { + OBJ_DESTRUCT(mca_pml_ob1_sendreq); + mca_pml_ob1_sendreq = NULL; + } + if( NULL != mca_pml_ob1.allocator ) { (void)mca_pml_ob1.allocator->alc_finalize(mca_pml_ob1.allocator); mca_pml_ob1.allocator = NULL; diff --git a/ompi/mca/pml/ob1/pml_ob1_irecv.c b/ompi/mca/pml/ob1/pml_ob1_irecv.c index 88174dd8c8..d06fe09c10 100644 --- a/ompi/mca/pml/ob1/pml_ob1_irecv.c +++ b/ompi/mca/pml/ob1/pml_ob1_irecv.c @@ -32,6 +32,8 @@ #include #endif /* HAVE_ALLOCA_H */ +mca_pml_ob1_recv_request_t *mca_pml_ob1_recvreq = NULL; + int mca_pml_ob1_irecv_init(void *addr, size_t count, ompi_datatype_t * datatype, @@ -92,10 +94,15 @@ int mca_pml_ob1_recv(void *addr, struct ompi_communicator_t *comm, ompi_status_public_t * status) { - mca_pml_ob1_recv_request_t *recvreq = - alloca(mca_pml_base_recv_requests.fl_frag_size); + mca_pml_ob1_recv_request_t *recvreq = mca_pml_ob1_recvreq; int rc; + if( NULL == recvreq ) { + MCA_PML_OB1_RECV_REQUEST_ALLOC(recvreq); + if (NULL == recvreq) + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + mca_pml_ob1_recvreq = recvreq; + } OBJ_CONSTRUCT(recvreq, mca_pml_ob1_recv_request_t); MCA_PML_OB1_RECV_REQUEST_INIT(recvreq, addr, count, datatype, diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index 157cddd730..3093e70bb8 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -32,6 +32,8 @@ #include #endif /* HAVE_ALLOCA_H */ +mca_pml_ob1_send_request_t *mca_pml_ob1_sendreq = NULL; + int mca_pml_ob1_isend_init(void *buf, size_t count, ompi_datatype_t * datatype, @@ -181,8 +183,7 @@ int mca_pml_ob1_send(void *buf, ompi_proc_t *dst_proc = ompi_comm_peer_lookup (comm, dst); mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) dst_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - mca_pml_ob1_send_request_t *sendreq = - alloca(mca_pml_base_send_requests.fl_frag_size); + mca_pml_ob1_send_request_t *sendreq = mca_pml_ob1_sendreq; int16_t seqn; int rc; @@ -206,6 +207,11 @@ int mca_pml_ob1_send(void *buf, seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_comm->procs[dst].send_sequence, 1); + /** + * The immediate send will not have a request, so they are + * intracable from the point of view of any debugger attached to + * the parallel application. + */ if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) { rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc, endpoint, comm); @@ -214,6 +220,13 @@ int mca_pml_ob1_send(void *buf, } } + if( NULL == sendreq ) { + MCA_PML_OB1_SEND_REQUEST_ALLOC(comm, dst, sendreq); + if (NULL == sendreq) + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + mca_pml_ob1_sendreq = sendreq; + } + OBJ_CONSTRUCT(sendreq, mca_pml_ob1_send_request_t); sendreq->req_send.req_base.req_proc = dst_proc; sendreq->rdma_frag = NULL;