1
1

Adding a hash table for management dependences between SRQs and their BTL modules.

This commit was SVN r22653.
Этот коммит содержится в:
Pavel Shamis 2010-02-18 09:48:16 +00:00
родитель 2be03b4fb6
Коммит a124f6b10b
6 изменённых файлов: 120 добавлений и 35 удалений

Просмотреть файл

@ -328,6 +328,24 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
return OMPI_ERROR;
}
#if OPAL_HAVE_THREADS
{
opal_mutex_t *lock = &mca_btl_openib_component.srq_manager.lock;
opal_hash_table_t *srq_addr_table = &mca_btl_openib_component.srq_manager.srq_addr_table;
opal_mutex_lock(lock);
if (OPAL_SUCCESS != opal_hash_table_set_value_ptr(
srq_addr_table, &openib_btl->qps[qp].u.srq_qp.srq,
sizeof(struct ibv_srq*), (void*) openib_btl)) {
BTL_ERROR(("SRQ Internal error."
" Failed to add element to mca_btl_openib_component.srq_manager.srq_addr_table\n"));
opal_mutex_unlock(lock);
return OMPI_ERROR;
}
opal_mutex_unlock(lock);
}
#endif
rd_num = mca_btl_openib_component.qp_infos[qp].rd_num;
rd_curr_num = openib_btl->qps[qp].u.srq_qp.rd_curr_num = mca_btl_openib_component.qp_infos[qp].u.srq_qp.rd_init;
@ -1260,17 +1278,36 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl)
/* Release SRQ resources */
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
if(!BTL_OPENIB_QP_TYPE_PP(qp)) {
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
if (NULL != openib_btl->qps[qp].u.srq_qp.srq &&
ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)){
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
if (NULL != openib_btl->qps[qp].u.srq_qp.srq) {
#if OPAL_HAVE_THREADS
opal_mutex_t *lock =
&mca_btl_openib_component.srq_manager.lock;
opal_hash_table_t *srq_addr_table =
&mca_btl_openib_component.srq_manager.srq_addr_table;
opal_mutex_lock(lock);
if (OPAL_SUCCESS !=
opal_hash_table_remove_value_ptr(srq_addr_table,
&openib_btl->qps[qp].u.srq_qp.srq,
sizeof(struct ibv_srq *))) {
BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp));
rc = OMPI_ERROR;
}
opal_mutex_unlock(lock);
#endif
if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) {
BTL_VERBOSE(("Failed to close SRQ %d", qp));
rc = OMPI_ERROR;
}
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
}
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
}
}
@ -1288,6 +1325,10 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl)
OBJ_RELEASE(openib_btl->device);
}
if (NULL != openib_btl->qps) {
free(openib_btl->qps);
}
return rc;
}

Просмотреть файл

@ -136,6 +136,17 @@ typedef enum {
BTL_OPENIB_DT_ALL
} btl_openib_device_type_t;
#if OPAL_HAVE_THREADS
/* The structer for manage all BTL SRQs */
typedef struct mca_btl_openib_srq_manager_t {
opal_mutex_t lock;
/* The keys of this hash table are addresses of
SRQs structures, and the elements are BTL modules
pointers that associated with these SRQs */
opal_hash_table_t srq_addr_table;
} mca_btl_openib_srq_manager_t;
#endif
struct mca_btl_openib_component_t {
mca_btl_base_component_2_0_0_t super; /**< base BTL component */
@ -217,6 +228,7 @@ struct mca_btl_openib_component_t {
int async_comp_pipe[2]; /**< Pipe for async thread comunication with main thread */
pthread_t async_thread; /**< Async thread that will handle fatal errors */
uint32_t use_async_event_thread; /**< Use the async event handler */
mca_btl_openib_srq_manager_t srq_manager; /**< Hash table for all BTL SRQs */
#endif
btl_openib_device_type_t device_type;
char *if_include;

Просмотреть файл

@ -231,10 +231,22 @@ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_p
the number of WQEs that we post = rd_curr_num < rd_num and this value is
increased (by needs) in IBV_EVENT_SRQ_LIMIT_REACHED event handler (i.e. in this function),
the event will thrown by device if number of WQEs in SRQ will be less than srq_limit */
static int btl_openib_async_srq_limit_event(struct ibv_srq* srq,
mca_btl_openib_module_t *openib_btl)
static int btl_openib_async_srq_limit_event(struct ibv_srq* srq)
{
int qp;
int qp, rc = OMPI_SUCCESS;
mca_btl_openib_module_t *openib_btl = NULL;
opal_mutex_t *lock = &mca_btl_openib_component.srq_manager.lock;
opal_hash_table_t *srq_addr_table = &mca_btl_openib_component.srq_manager.srq_addr_table;
opal_mutex_lock(lock);
if (OPAL_SUCCESS != opal_hash_table_get_value_ptr(srq_addr_table,
&srq, sizeof(struct ibv_srq*), (void*) &openib_btl)) {
/* If there isn't any element with the key in the table =>
we assume that SRQ was destroyed and don't serve the event */
goto srq_limit_event_exit;
}
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
if (!BTL_OPENIB_QP_TYPE_PP(qp)) {
@ -245,34 +257,36 @@ static int btl_openib_async_srq_limit_event(struct ibv_srq* srq,
}
if(qp >= mca_btl_openib_component.num_qps) {
orte_show_help("help-mpi-btl-openib.txt", "SRQ not found",
true,orte_process_info.nodename,
ibv_get_device_name(openib_btl->device->ib_dev));
return OMPI_ERROR;
BTL_ERROR(("Open MPI tried to access a shared receive queue (SRQ) on the device %s that was not found. This should not happen, and is a fatal error. Your MPI job will now abort.\n", ibv_get_device_name(openib_btl->device->ib_dev)));
rc = OMPI_ERROR;
goto srq_limit_event_exit;
}
/* dynamically re-size the SRQ to be larger */
openib_btl->qps[qp].u.srq_qp.rd_curr_num <<= 1;
if(openib_btl->qps[qp].u.srq_qp.rd_curr_num >= mca_btl_openib_component.qp_infos[qp].rd_num) {
if(openib_btl->qps[qp].u.srq_qp.rd_curr_num >=
mca_btl_openib_component.qp_infos[qp].rd_num) {
openib_btl->qps[qp].u.srq_qp.rd_curr_num = mca_btl_openib_component.qp_infos[qp].rd_num;
openib_btl->qps[qp].u.srq_qp.rd_low_local = mca_btl_openib_component.qp_infos[qp].rd_low;
openib_btl->qps[qp].u.srq_qp.srq_limit_event_flag = false;
return OMPI_SUCCESS;
goto srq_limit_event_exit;
}
openib_btl->qps[qp].u.srq_qp.rd_low_local <<= 1;
openib_btl->qps[qp].u.srq_qp.srq_limit_event_flag = true;
return OMPI_SUCCESS;
srq_limit_event_exit:
opal_mutex_unlock(lock);
return rc;
}
/* Function handle async device events */
static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_poll, int index)
{
int j, btl_index = 0;
int j;
mca_btl_openib_device_t *device = NULL;
struct ibv_async_event event;
bool xrc_event = false;
@ -283,8 +297,6 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
if (mca_btl_openib_component.openib_btls[j]->device->ib_dev_context->async_fd ==
devices_poll->async_pollfd[index].fd ) {
device = mca_btl_openib_component.openib_btls[j]->device;
btl_index = j;
break;
}
}
@ -355,8 +367,8 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
/* The event is signaled when number of prepost receive WQEs is going
under predefined threshold - srq_limit */
case IBV_EVENT_SRQ_LIMIT_REACHED:
if(OMPI_SUCCESS != btl_openib_async_srq_limit_event(event.element.srq,
mca_btl_openib_component.openib_btls[btl_index])) {
if(OMPI_SUCCESS !=
btl_openib_async_srq_limit_event(event.element.srq)) {
return OMPI_ERROR;
}

Просмотреть файл

@ -137,6 +137,15 @@ int btl_openib_component_open(void)
{
int ret;
#if OPAL_HAVE_THREADS
opal_mutex_t *lock = &mca_btl_openib_component.srq_manager.lock;
opal_hash_table_t *srq_addr_table = &mca_btl_openib_component.srq_manager.srq_addr_table;
/* Construct hash table that stores pointers to SRQs */
OBJ_CONSTRUCT(lock, opal_mutex_t);
OBJ_CONSTRUCT(srq_addr_table, opal_hash_table_t);
#endif
/* initialize state */
mca_btl_openib_component.ib_num_btls = 0;
mca_btl_openib_component.openib_btls = NULL;
@ -188,6 +197,9 @@ static int btl_openib_component_close(void)
close(mca_btl_openib_component.async_comp_pipe[0]);
close(mca_btl_openib_component.async_comp_pipe[1]);
}
OBJ_DESTRUCT(&mca_btl_openib_component.srq_manager.lock);
OBJ_DESTRUCT(&mca_btl_openib_component.srq_manager.srq_addr_table);
#endif
ompi_btl_openib_connect_base_finalize();
@ -2643,6 +2655,19 @@ btl_openib_component_init(int *num_btl_modules,
if (OMPI_SUCCESS != setup_qps()) {
goto no_btls;
}
#if OPAL_HAVE_THREADS
if (mca_btl_openib_component.num_srq_qps > 0 ||
mca_btl_openib_component.num_xrc_qps > 0) {
opal_hash_table_t *srq_addr_table = &mca_btl_openib_component.srq_manager.srq_addr_table;
if(OPAL_SUCCESS != opal_hash_table_init(
srq_addr_table, (mca_btl_openib_component.num_srq_qps +
mca_btl_openib_component.num_xrc_qps) *
mca_btl_openib_component.ib_num_btls)) {
BTL_ERROR(("SRQ internal error. Failed to allocate SRQ addr hash table"));
goto no_btls;
}
}
#endif
/* For XRC:
* from this point we know if MCA_BTL_XRC_ENABLED it true or false */

Просмотреть файл

@ -163,11 +163,6 @@ int btl_openib_register_mca_params(void)
1, &ival, 0));
mca_btl_openib_component.warn_nonexistent_if = (0 != ival);
CHECK(reg_int("enable_srq_resize", NULL,
"Enable/Disable on demand SRQ resize. "
"(0 = without resizing, nonzero = with resizing)", 1, &ival, 0));
mca_btl_openib_component.enable_srq_resize = (0 != ival);
if (OMPI_HAVE_IBV_FORK_INIT) {
ival2 = -1;
} else {
@ -455,6 +450,13 @@ int btl_openib_register_mca_params(void)
"If nonzero, use the thread that will handle InfiniBand asyncihronous events ",
1, &ival, 0));
mca_btl_openib_component.use_async_event_thread = (0 != ival);
CHECK(reg_int("enable_srq_resize", NULL,
"Enable/Disable on demand SRQ resize. "
"(0 = without resizing, nonzero = with resizing)", 1, &ival, 0));
mca_btl_openib_component.enable_srq_resize = (0 != ival);
#else
mca_btl_openib_component.enable_srq_resize = 0;
#endif
CHECK(reg_int("buffer_alignment", NULL,

Просмотреть файл

@ -168,13 +168,6 @@ peer to which it was connected:
You may need to consult with your system administrator to get this
problem fixed.
#
[SRQ not found]
Open MPI tried to access a shared receive queue (SRQ) that was not found.
This should not happen, and is a fatal error. Your MPI job will now abort.
Local host: %s
Local device: %s
#
[srq rnr retry exceeded]
The OpenFabrics "receiver not ready" retry count on a shared receive
queue or XRC receive queue has been exceeded. This error can occur if