usnic: fix endpoint destruction on the trunk
Fixes an assertion failure in --enable-debug builds and SEGVs in normal builds. I'm not 100% sure I like this model, but it at least seems to be consistent. Some variation on this scheme will need to be adapted to the trunk, where usnic_del_procs() is called by the PML instead of internally in usnic_finalize(). A related bug (but with different mechanics) is #4832. This commit was SVN r32424.
Этот коммит содержится в:
родитель
490c484f8c
Коммит
13b104bdef
@ -116,12 +116,6 @@ static void endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
|
|||||||
so it should be safe to unconditionally destruct the ack_li */
|
so it should be safe to unconditionally destruct the ack_li */
|
||||||
OBJ_DESTRUCT(&(endpoint->endpoint_ack_li));
|
OBJ_DESTRUCT(&(endpoint->endpoint_ack_li));
|
||||||
|
|
||||||
/* Remove this endpoint from module->all_endpoints list, then
|
|
||||||
destruct the list_item_t */
|
|
||||||
opal_mutex_lock(&endpoint->endpoint_module->all_endpoints_lock);
|
|
||||||
opal_list_remove_item(&endpoint->endpoint_module->all_endpoints,
|
|
||||||
&endpoint->endpoint_endpoint_li);
|
|
||||||
opal_mutex_unlock(&endpoint->endpoint_module->all_endpoints_lock);
|
|
||||||
OBJ_DESTRUCT(&(endpoint->endpoint_endpoint_li));
|
OBJ_DESTRUCT(&(endpoint->endpoint_endpoint_li));
|
||||||
|
|
||||||
if (endpoint->endpoint_hotel.rooms != NULL) {
|
if (endpoint->endpoint_hotel.rooms != NULL) {
|
||||||
@ -179,3 +173,12 @@ opal_btl_usnic_flush_endpoint(
|
|||||||
/* Now, ACK everything that is pending */
|
/* Now, ACK everything that is pending */
|
||||||
opal_btl_usnic_handle_ack(endpoint, endpoint->endpoint_next_seq_to_send-1);
|
opal_btl_usnic_handle_ack(endpoint, endpoint->endpoint_next_seq_to_send-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void opal_btl_usnic_release_endpoint(opal_btl_usnic_module_t *module,
|
||||||
|
opal_btl_usnic_endpoint_t *endpoint)
|
||||||
|
{
|
||||||
|
opal_mutex_lock(&module->all_endpoints_lock);
|
||||||
|
opal_list_remove_item(&module->all_endpoints, &endpoint->endpoint_endpoint_li);
|
||||||
|
opal_mutex_unlock(&module->all_endpoints_lock);
|
||||||
|
OBJ_RELEASE(endpoint);
|
||||||
|
}
|
||||||
|
@ -186,5 +186,11 @@ void
|
|||||||
opal_btl_usnic_flush_endpoint(
|
opal_btl_usnic_flush_endpoint(
|
||||||
opal_btl_usnic_endpoint_t *endpoint);
|
opal_btl_usnic_endpoint_t *endpoint);
|
||||||
|
|
||||||
|
/* Release the given endpoint and remove it from the all_endpoints list. The
|
||||||
|
* reference that is released was logically held by the all_endpoints list
|
||||||
|
* (this is not a generic release function). */
|
||||||
|
void opal_btl_usnic_release_endpoint(struct opal_btl_usnic_module_t *module,
|
||||||
|
opal_btl_usnic_endpoint_t *endpoint);
|
||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
#endif
|
#endif
|
||||||
|
@ -263,7 +263,7 @@ static int add_procs_create_ahs(opal_btl_usnic_module_t *module,
|
|||||||
EHOSTUNREACH == errno) {
|
EHOSTUNREACH == errno) {
|
||||||
add_procs_warn_ah_fail(module, endpoints[i]);
|
add_procs_warn_ah_fail(module, endpoints[i]);
|
||||||
|
|
||||||
OBJ_RELEASE(endpoints[i]);
|
opal_btl_usnic_release_endpoint(module, endpoints[i]);
|
||||||
endpoints[i] = NULL;
|
endpoints[i] = NULL;
|
||||||
--num_ah_left;
|
--num_ah_left;
|
||||||
}
|
}
|
||||||
@ -317,7 +317,7 @@ static int add_procs_create_ahs(opal_btl_usnic_module_t *module,
|
|||||||
if (NULL != endpoints[i]) {
|
if (NULL != endpoints[i]) {
|
||||||
if (OPAL_SUCCESS != ret ||
|
if (OPAL_SUCCESS != ret ||
|
||||||
NULL == endpoints[i]->endpoint_remote_ah) {
|
NULL == endpoints[i]->endpoint_remote_ah) {
|
||||||
OBJ_RELEASE(endpoints[i]);
|
opal_btl_usnic_release_endpoint(module, endpoints[i]);
|
||||||
endpoints[i] = NULL;
|
endpoints[i] = NULL;
|
||||||
} else {
|
} else {
|
||||||
++num_created;
|
++num_created;
|
||||||
@ -402,7 +402,7 @@ static int usnic_add_procs(struct mca_btl_base_module_t* base_module,
|
|||||||
reachable. */
|
reachable. */
|
||||||
for (size_t i = 0; i < nprocs; ++i) {
|
for (size_t i = 0; i < nprocs; ++i) {
|
||||||
if (NULL != endpoints[i]) {
|
if (NULL != endpoints[i]) {
|
||||||
OBJ_RELEASE(endpoints[i]);
|
opal_btl_usnic_release_endpoint(module, endpoints[i]);
|
||||||
endpoints[i] = NULL;
|
endpoints[i] = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -451,7 +451,7 @@ static int usnic_del_procs(struct mca_btl_base_module_t *base_module,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* We're all done with this endpoint */
|
/* We're all done with this endpoint */
|
||||||
OBJ_RELEASE(endpoint);
|
opal_btl_usnic_release_endpoint(module, endpoint);
|
||||||
|
|
||||||
break; /* done once we found match */
|
break; /* done once we found match */
|
||||||
}
|
}
|
||||||
|
@ -362,7 +362,7 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
|
|||||||
|
|
||||||
/* if endpoint exiting, and all ACKs received, release the endpoint */
|
/* if endpoint exiting, and all ACKs received, release the endpoint */
|
||||||
if (endpoint->endpoint_exiting && ENDPOINT_DRAINED(endpoint)) {
|
if (endpoint->endpoint_exiting && ENDPOINT_DRAINED(endpoint)) {
|
||||||
OBJ_RELEASE(endpoint);
|
opal_btl_usnic_release_endpoint(module, endpoint);
|
||||||
}
|
}
|
||||||
repost_no_endpoint:
|
repost_no_endpoint:
|
||||||
++module->stats.num_recv_reposts;
|
++module->stats.num_recv_reposts;
|
||||||
|
@ -334,7 +334,7 @@ opal_btl_usnic_recv_frag_bookkeeping(
|
|||||||
repost:
|
repost:
|
||||||
/* if endpoint exiting, and all ACKs received, release the endpoint */
|
/* if endpoint exiting, and all ACKs received, release the endpoint */
|
||||||
if (endpoint->endpoint_exiting && ENDPOINT_DRAINED(endpoint)) {
|
if (endpoint->endpoint_exiting && ENDPOINT_DRAINED(endpoint)) {
|
||||||
OBJ_RELEASE(endpoint);
|
opal_btl_usnic_release_endpoint(module, endpoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
++module->stats.num_recv_reposts;
|
++module->stats.num_recv_reposts;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user