btl/openib: fix locking bugs with XRC ib_addr lock
This bug fixes two issue with the ib_addr lock: - The ib_addr lock must always be obtained regardless of opal_using_threads() as the CPC is run in a seperate thread. - The ib_addr lock is held in mca_btl_openib_endpoint_connected when calling back into the CPC start_connect on any pending connections. This will attempt to obtain the ib_addr lock again. Since this is not a performance-critical part of the code the lock has been changed to be recursive. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
4dc73d7765
Коммит
371df45bf8
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved.
|
||||
@ -579,7 +579,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
|
||||
|
||||
opal_output(-1, "Now we are CONNECTED");
|
||||
if (MCA_BTL_XRC_ENABLED) {
|
||||
OPAL_THREAD_LOCK(&endpoint->ib_addr->addr_lock);
|
||||
opal_mutex_lock (&endpoint->ib_addr->addr_lock);
|
||||
if (MCA_BTL_IB_ADDR_CONNECTED == endpoint->ib_addr->status) {
|
||||
/* We are not xrc master */
|
||||
/* set our qp pointer to master qp */
|
||||
@ -622,7 +622,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
|
||||
}
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock);
|
||||
opal_mutex_unlock (&endpoint->ib_addr->addr_lock);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
@ -5,6 +6,8 @@
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014 Bull SAS. All rights reserved.
|
||||
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -122,7 +125,10 @@ static void ib_address_constructor(ib_address_t *ib_addr)
|
||||
ib_addr->lid = 0;
|
||||
ib_addr->status = MCA_BTL_IB_ADDR_CLOSED;
|
||||
ib_addr->qp = NULL;
|
||||
OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_mutex_t);
|
||||
/* NTH: make the addr_lock recursive because mca_btl_openib_endpoint_connected can call
|
||||
* into the CPC with the lock held. The alternative would be to drop the lock but the
|
||||
* lock is never obtained in a critical path. */
|
||||
OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_recursive_mutex_t);
|
||||
OBJ_CONSTRUCT(&ib_addr->pending_ep, opal_list_t);
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user