1
1

Fix some issues from jsquyres review.

1. Use asprintf instead of snprintf
2. Return remote_proc where possible.
3. Remove dead code.
4. Fix two comment typos.

This commit was SVN r24265.
Этот коммит содержится в:
Rolf vandeVaart 2011-01-19 16:09:17 +00:00
родитель 0e921bba7f
Коммит e75b86d3ab
2 изменённых файлов: 25 добавлений и 22 удалений

Просмотреть файл

@ -3240,6 +3240,12 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
while((i = opal_list_remove_first(&to_send_frag(des)->coalesced_frags))) {
btl_ownership = (to_base_frag(i)->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
#if OMPI_OPENIB_FAILOVER_ENABLED
/* The check for the callback flag is only needed when running
* with the failover case because there is a chance that a fragment
* generated from a sendi call (which does not set the flag) gets
* coalesced. In normal operation, this cannot happen as the sendi
* call will never queue up a fragment which could potentially become
* a coalesced fragment. It will revert to a regular send. */
if (to_base_frag(i)->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
#endif
to_base_frag(i)->base.des_cbfunc(&openib_btl->super, endpoint,
@ -3393,7 +3399,7 @@ error:
#else
if(openib_btl)
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL,
NULL, NULL);
remote_proc, NULL);
#endif
}

Просмотреть файл

@ -60,7 +60,7 @@ void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl,
ompi_proc_t* remote_proc,
mca_btl_openib_endpoint_t* endpoint)
{
char btlname[IBV_SYSFS_NAME_MAX];
char *btlname = NULL;
int btl_ownership;
/* Since this BTL supports failover, it will call the PML error handler
* function with the NONFATAL flag. If the PML is running with failover
@ -76,12 +76,6 @@ void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl,
* operation was done. The important information needs to be read
* from the fragment. */
/* Create a nice string to help with debug */
if (NULL != openib_btl) {
snprintf(btlname, IBV_SYSFS_NAME_MAX-1, "lid=%d:name=%s",
openib_btl->lid, openib_btl->device->ib_dev->name);
}
/* Cannot issue callback to SRQ errors because the shared receive
* queue is shared and is not specific to a connection. There is no
* way to figure out what type of message created the error because
@ -98,6 +92,12 @@ void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl,
}
assert(NULL != remote_proc);
/* Create a nice string to help with debug */
if (NULL != openib_btl) {
asprintf(&btlname, "lid=%d:name=%s",
openib_btl->lid, openib_btl->device->ib_dev->name);
}
/* The next set of errors are associated with an endpoint, but not
* with a PML descriptor. They are not associated with a PML
* descriptor because:
@ -124,6 +124,7 @@ void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl,
"MCA_BTL_OPENIG_FRAG=%d, "
"dropping since connection is broken (des=%lx)",
openib_frag_type(des), (long unsigned int) des);
if (NULL != btlname) free(btlname);
return;
}
@ -143,6 +144,7 @@ void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl,
* the same remote_proc argument will not actually map anything out. */
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_NONFATAL,
remote_proc, btlname);
if (NULL != btlname) free(btlname);
/* Since we believe we have done a send, read or write, then the
* des_src fields should have valid data. */
@ -203,14 +205,6 @@ void mca_btl_openib_handle_endpoint_error(mca_btl_openib_module_t *openib_btl,
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
}
#if 0
/* Since a QP has an error, let us go ahead and drain off the
* broken fragments. This is not strictly necessary as we keep
* track of outstanding requests on any rendezvous requests. But,
* I think it makes sense so we will keep it here. */
progress_one_device(openib_btl->device);
#endif
/* There are several queues associated with an endpoint that may
* have some unsent fragments sitting in them. Remove them and
* call the callback functions with an error so the PML can send
@ -240,14 +234,15 @@ void mca_btl_openib_handle_btl_error(mca_btl_openib_module_t* openib_btl) {
if(mca_btl_openib_component.port_error_failover) {
/* Since we are not specifying a specific connection to bring down,
* the PML layer will may out the entire BTL for future communication. */
char btlname[IBV_SYSFS_NAME_MAX];
snprintf(btlname, IBV_SYSFS_NAME_MAX-1, "lid=%d:name=%s",
char *btlname = NULL;
asprintf(&btlname, "lid=%d:name=%s",
openib_btl->lid, openib_btl->device->ib_dev->name);
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_NONFATAL,
NULL, btlname);
if (NULL != btlname) free(btlname);
/* Now send out messages to all endpoints that we are disconnecting.
* Only do ths to endpoints that are connected. Otherwise, the
* Only do this to endpoints that are connected. Otherwise, the
* remote side does not yet have the information on this endpoint. */
for (i = 0; i < opal_pointer_array_get_size(openib_btl->device->endpoints); i++) {
endpoint = (mca_btl_openib_endpoint_t*)
@ -281,7 +276,7 @@ void mca_btl_openib_handle_btl_error(mca_btl_openib_module_t* openib_btl) {
* one side of the connection actually sees the error. This means we
* can be left in a state where one side believes it has two BTLs, but
* the other side believes it only has one. This can cause problems.
* In the case of the EAGER_RDMA_ERROR, we elsewhere in the code what
* In the case of the EAGER_RDMA_ERROR, see elsewhere in the code what
* we are doing.
* @param ctl_hdr Pointer control header that was received
*/
@ -335,10 +330,10 @@ void btl_openib_handle_failover_control_messages(mca_btl_openib_control_header_t
if (MCA_BTL_IB_FAILED == newep->endpoint_state) {
return;
} else {
char btlname[IBV_SYSFS_NAME_MAX];
char *btlname = NULL;
ompi_proc_t* remote_proc = NULL;
snprintf(btlname, IBV_SYSFS_NAME_MAX-1, "lid=%d:name=%s",
asprintf(&btlname, "lid=%d:name=%s",
newbtl->lid, newbtl->device->ib_dev->name);
remote_proc = newep->endpoint_proc->proc_ompi;
@ -353,6 +348,8 @@ void btl_openib_handle_failover_control_messages(mca_btl_openib_control_header_t
newep->endpoint_state);
newbtl->error_cb(&newbtl->super, MCA_BTL_ERROR_FLAGS_NONFATAL,
remote_proc, btlname);
if (NULL != btlname) free(btlname);
error_out_all_pending_frags(newep, &newbtl->super, true);
newep->endpoint_state = MCA_BTL_IB_FAILED;
return;