1
1

Be a little more conservative about initializing devices and modules

(i.e., ensure that more data items get zeroed out/set to NULL) so that
if something goes wrong during initialization, we don't try to clean
up something that isn't there (and segv).

The chance of this happening on the trunk is very low (and will also
be low once the verbs improvements are brought over to v1.7).  But it
can actually happen in the v1.6 branch (e.g., if no CPC is available,
we'll try to get the length of the endpoints list, but the endpoints
list is NULL).  

Hence, even though the real goal is to get this functionality over to
v1.6, I figured I'd commit to the trunk/CMR to v1.7 just to try to
keep commonality in the openib between all three where possible.

This commit was SVN r28317.
Этот коммит содержится в:
Jeff Squyres 2013-04-09 21:55:31 +00:00
родитель 975ff4675e
Коммит 8405975bf6
2 изменённых файлов: 22 добавлений и 18 удалений

Просмотреть файл

@ -1409,25 +1409,27 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl)
}
/* Release all QPs */
for (ep_index=0;
ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
ep_index++) {
endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
ep_index);
if(!endpoint) {
BTL_VERBOSE(("In finalize, got another null endpoint"));
continue;
}
if(endpoint->endpoint_btl != openib_btl) {
continue;
}
for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
openib_btl->device->eager_rdma_buffers[i] = NULL;
OBJ_RELEASE(endpoint);
if (NULL != openib_btl->device->endpoints) {
for (ep_index=0;
ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
ep_index++) {
endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
ep_index);
if(!endpoint) {
BTL_VERBOSE(("In finalize, got another null endpoint"));
continue;
}
if(endpoint->endpoint_btl != openib_btl) {
continue;
}
for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
openib_btl->device->eager_rdma_buffers[i] = NULL;
OBJ_RELEASE(endpoint);
}
}
OBJ_RELEASE(endpoint);
}
OBJ_RELEASE(endpoint);
}
/* Release SRQ resources */

Просмотреть файл

@ -775,7 +775,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
for(i = 0; i < mca_btl_openib_component.btls_per_lid; i++){
char param[40];
openib_btl = (mca_btl_openib_module_t *) malloc(sizeof(mca_btl_openib_module_t));
openib_btl = (mca_btl_openib_module_t *) calloc(1, sizeof(mca_btl_openib_module_t));
if(NULL == openib_btl) {
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
return OMPI_ERR_OUT_OF_RESOURCE;
@ -873,6 +873,8 @@ static void device_construct(mca_btl_openib_device_t *device)
device->ib_channel = NULL;
#endif
device->btls = 0;
device->endpoints = NULL;
device->device_btls = NULL;
device->ib_cq[BTL_OPENIB_HP_CQ] = NULL;
device->ib_cq[BTL_OPENIB_LP_CQ] = NULL;
device->cq_size[BTL_OPENIB_HP_CQ] = 0;