Be a little more conservative about initializing devices and modules
(i.e., ensure that more data items get zeroed out/set to NULL) so that if something goes wrong during initialization, we don't try to clean up something that isn't there (and segv). The chance of this happening on the trunk is very low (and will also be low once the verbs improvements are brought over to v1.7). But it can actually happen in the v1.6 branch (e.g., if no CPC is available, we'll try to get the length of the endpoints list, but the endpoints list is NULL). Hence, even though the real goal is to get this functionality over to v1.6, I figured I'd commit to the trunk/CMR to v1.7 just to try to keep commonality in the openib between all three where possible. This commit was SVN r28317.
Этот коммит содержится в:
родитель
975ff4675e
Коммит
8405975bf6
@ -1409,25 +1409,27 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl)
|
||||
}
|
||||
|
||||
/* Release all QPs */
|
||||
for (ep_index=0;
|
||||
ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
|
||||
ep_index++) {
|
||||
endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
|
||||
ep_index);
|
||||
if(!endpoint) {
|
||||
BTL_VERBOSE(("In finalize, got another null endpoint"));
|
||||
continue;
|
||||
}
|
||||
if(endpoint->endpoint_btl != openib_btl) {
|
||||
continue;
|
||||
}
|
||||
for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
|
||||
if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
|
||||
openib_btl->device->eager_rdma_buffers[i] = NULL;
|
||||
OBJ_RELEASE(endpoint);
|
||||
if (NULL != openib_btl->device->endpoints) {
|
||||
for (ep_index=0;
|
||||
ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
|
||||
ep_index++) {
|
||||
endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
|
||||
ep_index);
|
||||
if(!endpoint) {
|
||||
BTL_VERBOSE(("In finalize, got another null endpoint"));
|
||||
continue;
|
||||
}
|
||||
if(endpoint->endpoint_btl != openib_btl) {
|
||||
continue;
|
||||
}
|
||||
for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
|
||||
if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
|
||||
openib_btl->device->eager_rdma_buffers[i] = NULL;
|
||||
OBJ_RELEASE(endpoint);
|
||||
}
|
||||
}
|
||||
OBJ_RELEASE(endpoint);
|
||||
}
|
||||
OBJ_RELEASE(endpoint);
|
||||
}
|
||||
|
||||
/* Release SRQ resources */
|
||||
|
@ -775,7 +775,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
||||
for(i = 0; i < mca_btl_openib_component.btls_per_lid; i++){
|
||||
char param[40];
|
||||
|
||||
openib_btl = (mca_btl_openib_module_t *) malloc(sizeof(mca_btl_openib_module_t));
|
||||
openib_btl = (mca_btl_openib_module_t *) calloc(1, sizeof(mca_btl_openib_module_t));
|
||||
if(NULL == openib_btl) {
|
||||
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
@ -873,6 +873,8 @@ static void device_construct(mca_btl_openib_device_t *device)
|
||||
device->ib_channel = NULL;
|
||||
#endif
|
||||
device->btls = 0;
|
||||
device->endpoints = NULL;
|
||||
device->device_btls = NULL;
|
||||
device->ib_cq[BTL_OPENIB_HP_CQ] = NULL;
|
||||
device->ib_cq[BTL_OPENIB_LP_CQ] = NULL;
|
||||
device->cq_size[BTL_OPENIB_HP_CQ] = 0;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user