Also show the "you might not have enough registered memory" warning
message earlier in the openib BTL startup sequence This commit was SVN r21469.
Этот коммит содержится в:
родитель
771ce035a5
Коммит
c39998db17
@ -99,8 +99,8 @@ mca_btl_openib_module_t mca_btl_openib_module = {
|
|||||||
|
|
||||||
static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl);
|
static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl);
|
||||||
|
|
||||||
static void show_init_error(const char *file, int line,
|
void mca_btl_openib_show_init_error(const char *file, int line,
|
||||||
const char *func, const char *dev)
|
const char *func, const char *dev)
|
||||||
{
|
{
|
||||||
if (ENOMEM == errno) {
|
if (ENOMEM == errno) {
|
||||||
int ret;
|
int ret;
|
||||||
@ -166,15 +166,16 @@ static int adjust_cq(mca_btl_openib_device_t *device, const int cq)
|
|||||||
0);
|
0);
|
||||||
|
|
||||||
if (NULL == device->ib_cq[cq]) {
|
if (NULL == device->ib_cq[cq]) {
|
||||||
show_init_error(__FILE__, __LINE__, "ibv_create_cq",
|
mca_btl_openib_show_init_error(__FILE__, __LINE__, "ibv_create_cq",
|
||||||
ibv_get_device_name(device->ib_dev));
|
ibv_get_device_name(device->ib_dev));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if OPAL_ENABLE_PROGRESS_THREADS == 1
|
#if OPAL_ENABLE_PROGRESS_THREADS == 1
|
||||||
if(ibv_req_notify_cq(device->ib_cq[cq], 0)) {
|
if(ibv_req_notify_cq(device->ib_cq[cq], 0)) {
|
||||||
show_init_error(__FILE__, __LINE__, "ibv_req_notify_cq",
|
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||||
ibv_get_device_name(device->ib_dev));
|
"ibv_req_notify_cq",
|
||||||
|
ibv_get_device_name(device->ib_dev));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -236,8 +237,9 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
|
|||||||
ibv_create_srq(openib_btl->device->ib_pd, &attr);
|
ibv_create_srq(openib_btl->device->ib_pd, &attr);
|
||||||
}
|
}
|
||||||
if (NULL == openib_btl->qps[qp].u.srq_qp.srq) {
|
if (NULL == openib_btl->qps[qp].u.srq_qp.srq) {
|
||||||
show_init_error(__FILE__, __LINE__, "ibv_create_srq",
|
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||||
ibv_get_device_name(openib_btl->device->ib_dev));
|
"ibv_create_srq",
|
||||||
|
ibv_get_device_name(openib_btl->device->ib_dev));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -611,6 +611,13 @@ extern void mca_btl_openib_frag_progress_pending_put_get(
|
|||||||
extern int mca_btl_openib_ft_event(int state);
|
extern int mca_btl_openib_ft_event(int state);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Show an error during init, particularly when running out of
|
||||||
|
* registered memory.
|
||||||
|
*/
|
||||||
|
void mca_btl_openib_show_init_error(const char *file, int line,
|
||||||
|
const char *func, const char *dev);
|
||||||
|
|
||||||
#define BTL_OPENIB_HP_CQ 0
|
#define BTL_OPENIB_HP_CQ 0
|
||||||
#define BTL_OPENIB_LP_CQ 1
|
#define BTL_OPENIB_LP_CQ 1
|
||||||
|
|
||||||
|
@ -910,7 +910,7 @@ OBJ_CLASS_INSTANCE(mca_btl_openib_device_t, opal_object_t, device_construct,
|
|||||||
static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
||||||
{
|
{
|
||||||
mca_btl_openib_frag_init_data_t *init_data;
|
mca_btl_openib_frag_init_data_t *init_data;
|
||||||
int qp, length;
|
int rc, qp, length;
|
||||||
|
|
||||||
#if OPAL_HAVE_THREADS
|
#if OPAL_HAVE_THREADS
|
||||||
if(mca_btl_openib_component.use_async_event_thread) {
|
if(mca_btl_openib_component.use_async_event_thread) {
|
||||||
@ -985,16 +985,25 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
|||||||
init_data->order = MCA_BTL_NO_ORDER;
|
init_data->order = MCA_BTL_NO_ORDER;
|
||||||
init_data->list = &device->send_free_control;
|
init_data->list = &device->send_free_control;
|
||||||
|
|
||||||
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(
|
rc = ompi_free_list_init_ex_new(&device->send_free_control,
|
||||||
&device->send_free_control,
|
|
||||||
sizeof(mca_btl_openib_send_control_frag_t), CACHE_LINE_SIZE,
|
sizeof(mca_btl_openib_send_control_frag_t), CACHE_LINE_SIZE,
|
||||||
OBJ_CLASS(mca_btl_openib_send_control_frag_t), length,
|
OBJ_CLASS(mca_btl_openib_send_control_frag_t), length,
|
||||||
mca_btl_openib_component.buffer_alignment,
|
mca_btl_openib_component.buffer_alignment,
|
||||||
mca_btl_openib_component.ib_free_list_num, -1,
|
mca_btl_openib_component.ib_free_list_num, -1,
|
||||||
mca_btl_openib_component.ib_free_list_inc,
|
mca_btl_openib_component.ib_free_list_inc,
|
||||||
device->mpool, mca_btl_openib_frag_init,
|
device->mpool, mca_btl_openib_frag_init,
|
||||||
init_data)) {
|
init_data);
|
||||||
return OMPI_ERROR;
|
if (OMPI_SUCCESS != rc) {
|
||||||
|
/* If we're "out of memory", this usually means that we ran
|
||||||
|
out of registered memory, so show that error message */
|
||||||
|
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
|
||||||
|
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
|
||||||
|
errno = ENOMEM;
|
||||||
|
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||||
|
"ompi_free_list_init_ex_new",
|
||||||
|
ibv_get_device_name(device->ib_dev));
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup all the qps */
|
/* setup all the qps */
|
||||||
@ -1010,7 +1019,7 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
|||||||
init_data->order = qp;
|
init_data->order = qp;
|
||||||
init_data->list = &device->qps[qp].send_free;
|
init_data->list = &device->qps[qp].send_free;
|
||||||
|
|
||||||
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(init_data->list,
|
rc = ompi_free_list_init_ex_new(init_data->list,
|
||||||
sizeof(mca_btl_openib_send_frag_t), CACHE_LINE_SIZE,
|
sizeof(mca_btl_openib_send_frag_t), CACHE_LINE_SIZE,
|
||||||
OBJ_CLASS(mca_btl_openib_send_frag_t), length,
|
OBJ_CLASS(mca_btl_openib_send_frag_t), length,
|
||||||
mca_btl_openib_component.buffer_alignment,
|
mca_btl_openib_component.buffer_alignment,
|
||||||
@ -1018,7 +1027,18 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
|||||||
mca_btl_openib_component.ib_free_list_max,
|
mca_btl_openib_component.ib_free_list_max,
|
||||||
mca_btl_openib_component.ib_free_list_inc,
|
mca_btl_openib_component.ib_free_list_inc,
|
||||||
device->mpool, mca_btl_openib_frag_init,
|
device->mpool, mca_btl_openib_frag_init,
|
||||||
init_data)) {
|
init_data);
|
||||||
|
if (OMPI_SUCCESS != rc) {
|
||||||
|
/* If we're "out of memory", this usually means that we
|
||||||
|
ran out of registered memory, so show that error
|
||||||
|
message */
|
||||||
|
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
|
||||||
|
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
|
||||||
|
errno = ENOMEM;
|
||||||
|
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||||
|
"ompi_free_list_init_ex_new",
|
||||||
|
ibv_get_device_name(device->ib_dev));
|
||||||
|
}
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user