1
1

- When calling ompi_mtl_portals_finalize, when then pml/ob1 is used

(aka w/o  --mca pml cm), make sure PtlEQGet will actually work
   on ompi_mtl_portals.ptl_eq_h -- do so without adding code to 
   ompi_mtl_portals_progress.

   Otherwise we abort() with
[nid09979:32503] ompi_mtl_portals_finalize: Going to call ompi_mtl_portals_progress
[nid09979:32503]  Error returned from PtlEQGet.  Error code - 14
[nid09979:32502] Signal: Aborted (6)
[nid09979:32502] Signal code:  (-6) 

This commit was SVN r21761.
Этот коммит содержится в:
Rainer Keller
2009-08-04 22:48:07 +00:00
родитель 98bdf5d17b
Коммит 1bd94f2d98

Просмотреть файл

@ -69,13 +69,13 @@ ompi_mtl_portals_catchall_callback(ptl_event_t *ev,
ompi_mtl_portals_request_t *ptl_request) ompi_mtl_portals_request_t *ptl_request)
{ {
if (ptl_request == &catchall_send_request) { if (ptl_request == &catchall_send_request) {
opal_output(fileno(stderr),"ERROR - received catchall event on send queue"); opal_output(fileno(stderr), "ERROR - received catchall event on send queue");
} else if (ptl_request == &catchall_ack_request) { } else if (ptl_request == &catchall_ack_request) {
opal_output(fileno(stderr),"ERROR - received catchall event on ack queue"); opal_output(fileno(stderr), "ERROR - received catchall event on ack queue");
} else if (ptl_request == &catchall_read_request) { } else if (ptl_request == &catchall_read_request) {
opal_output(fileno(stderr),"ERROR - received catchall event on read queue"); opal_output(fileno(stderr), "ERROR - received catchall event on read queue");
} else { } else {
opal_output(fileno(stderr),"ERROR - received catchall event of unknown origin"); opal_output(fileno(stderr), "ERROR - received catchall event of unknown origin");
} }
abort(); abort();
@ -100,7 +100,7 @@ ompi_mtl_portals_add_procs(struct mca_mtl_base_module_t *mtl,
assert(mtl == &ompi_mtl_portals.base); assert(mtl == &ompi_mtl_portals.base);
/* if we havne't already initialized the network, do so now. We /* if we haven't already initialized the network, do so now. We
delay until add_procs because if we want the automatic runtime delay until add_procs because if we want the automatic runtime
environment setup the common code does for the utcp environment setup the common code does for the utcp
implementation, we can't do it until modex information can be implementation, we can't do it until modex information can be
@ -317,8 +317,17 @@ ompi_mtl_portals_finalize(struct mca_mtl_base_module_t *mtl)
/* Don't try to wait for things to finish if we've never initialized */ /* Don't try to wait for things to finish if we've never initialized */
if (PTL_INVALID_HANDLE != ompi_mtl_portals.ptl_ni_h) { if (PTL_INVALID_HANDLE != ompi_mtl_portals.ptl_ni_h) {
ptl_event_t ev;
int ret;
opal_progress_unregister(ompi_mtl_portals_progress); opal_progress_unregister(ompi_mtl_portals_progress);
while (0 != ompi_mtl_portals_progress()) { }
/* Before progressing remaining events, check whether we don't get PTL_EQ_INVALID */
ret = PtlEQPeek(ompi_mtl_portals.ptl_eq_h, &ev);
if (PTL_EQ_INVALID != ret) {
while (0 != ompi_mtl_portals_progress()) { }
}
} }
ompi_common_portals_ni_finalize(); ompi_common_portals_ni_finalize();
@ -347,7 +356,7 @@ ompi_mtl_portals_progress(void)
ret = ptl_request->event_callback(&ev, ptl_request); ret = ptl_request->event_callback(&ev, ptl_request);
if (OMPI_SUCCESS != ret) { if (OMPI_SUCCESS != ret) {
opal_output(0, " Error returned from the even callback. Error code - %d \n",ret); opal_output(0, " Error returned from the event callback. Error code - %d \n",ret);
abort(); abort();
} }
} }